def test_generate_tfrecords(train, tmpdir):
    
    created_records = boxes.generate_tfrecords(
        shapefile=test_predictions,
        site = 1,
        elevation=100,
        savedir=tmpdir,
        train=train,
        HSI_sensor_path=test_sensor_tile,
        RGB_sensor_path=test_sensor_tile,
        species_label_dict=None,
        RGB_size=100,
        HSI_size=20,
        classes=6)
    
    assert all([os.path.exists(x) for x in created_records])
    
    if train:
        dataset = boxes.tf_dataset(created_records, batch_size=2, mode="ensemble")
    else:
        dataset = boxes.tf_dataset(created_records, batch_size=2, mode="predict")
    
    if train:
        for (HSI, RGB), label_batch in dataset.take(3):
            assert HSI.shape == (2,20,20,3)
            assert RGB.shape == (2,100,100,3)            
            assert label_batch.shape == (2,6)
    else:
        for (HSI, RGB ), box_index_batch in dataset.take(3):
            assert HSI.shape == (2,20,20,3)
            assert RGB.shape == (2,100,100,3) 
            assert box_index_batch.shape == (2,)
示例#2
0
def test_generate_tfrecords(train, tmpdir):
    created_records = boxes.generate_tfrecords(shapefile=test_predictions,
                                               savedir=tmpdir,
                                               train=train,
                                               sensor_path=test_sensor_tile,
                                               height=20,
                                               width=20,
                                               classes=6)
    assert all([os.path.exists(x) for x in created_records])

    if train:
        dataset = boxes.tf_dataset(created_records, batch_size=2, mode="train")
    else:
        dataset = boxes.tf_dataset(created_records,
                                   batch_size=2,
                                   mode="predict")

    if train:
        for image_batch, label_batch in dataset.take(3):
            assert image_batch.shape == (2, 20, 20, 3)
            assert label_batch.shape == (2, 6)
    else:
        for image_batch, box_index_batch, in dataset.take(3):
            assert image_batch.shape == (2, 20, 20, 3)
            assert box_index_batch.shape == (2, )
示例#3
0
    def read_data(self, mode="train", validation_split=False):
        """Read tfrecord into datasets from config
            Args:
                validation_split: True -> split tfrecords into train test. This overrides the evaluation config!
            """
        self.train_records = glob.glob(
            os.path.join(self.config["train"]["tfrecords"], "*.tfrecord"))

        if len(self.train_records) == 0:
            raise IOError("Cannot find .tfrecords at {}".format(
                self.config["train"]["tfrecords"]))

        if validation_split:
            print("Splitting training set into train-test")
            train_df = pd.Series(self.train_records)
            #Sample with set seed to make it the same between runs
            self.train_split_records = train_df.head(
                int(self.config["train"]["training_fraction"] *
                    train_df.shape[0])).values
            self.test_split_records = train_df[~(
                train_df.isin(self.train_split_records))].values

            #Create training tf.data
            self.train_split = boxes.tf_dataset(
                tfrecords=self.train_split_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])

            #Create testing tf.data
            self.val_split = boxes.tf_dataset(
                tfrecords=self.test_split_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])
        else:
            #Create training tf.data
            self.train_split = boxes.tf_dataset(
                tfrecords=self.train_records,
                batch_size=self.config["train"]["batch_size"],
                shuffle=self.config["train"]["shuffle"],
                mode=mode,
                cores=self.config["cpu_workers"])

            #honor config if validation not set
            self.val_split = None
            if self.config["evaluation"]["tfrecords"] is not None:
                self.test_records = glob.glob(
                    os.path.join(self.config["evaluation"]["tfrecords"],
                                 "*.tfrecord"))

                self.val_split = boxes.tf_dataset(
                    tfrecords=self.test_records,
                    batch_size=self.config["train"]["batch_size"],
                    shuffle=self.config["train"]["shuffle"],
                    mode=mode,
                    cores=self.config["cpu_workers"])
示例#4
0
def test_generate_records(tmpdir, ensemble_model):
    created_records = boxes.generate_tfrecords(
        shapefile=test_predictions,
        domain=1,
        site=1,
        elevation=100.0,
        savedir=tmpdir,
        HSI_sensor_path=test_hsi_tile,
        RGB_sensor_path=test_sensor_tile,
        species_label_dict=None,
        RGB_size=100,
        HSI_size=10,
        classes=6,
        number_of_sites=10,
        number_of_domains=10,
        ensemble_model=None)

    shp = gpd.read_file(test_predictions)

    dataset = boxes.tf_dataset(created_records, batch_size=1)

    counter = 0
    for batch in dataset:
        counter += 1

    assert counter == shp.shape[0]
示例#5
0
def test_main():
    created_records = prepare_field_data.main(
        field_data=data_path,
        hyperspectral_dir=hyperspectral_dir,
        RGB_size=height,
        HSI_size=width,
        rgb_dir=rgb_dir,
        hyperspectral_savedir=hyperspectral_savedir,
        extend_box=0.5)

    dataset = boxes.tf_dataset(created_records, batch_size=1, mode="RGB_train")
    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()

    with tensorflow.Session() as sess:
        labels = []
        counter = 0
        while True:
            try:
                data, label = sess.run(next_element)
                assert data.shape == (1, height, width, 3)
                assert label.shape == (1, 3)

                plt.imshow(data[0].astype("uint8"))
                labels.append(label)
                counter += 1
            except tensorflow.errors.OutOfRangeError:
                break
    input_data = gpd.read_file(data_path)
    assert counter == input_data.shape[0]
def test_metadata(created_records):
    dataset = boxes.tf_dataset(created_records, batch_size=2, mode="metadata")
    for data, label_batch in dataset.take(1):
        elevation, site, domain = data
        assert elevation.numpy().shape == (2, )
        assert site.numpy().shape == (2, 10)
        assert domain.numpy().shape == (2, 16)
示例#7
0
def test_main():
    created_records = prepare_field_data.main(
        field_data=data_path,
        hyperspectral_pool=hyperspec_pool,
        height=height,
        width=width,
        rgb_pool=rgb_pool,
        sensor="rgb",
        hyperspectral_savedir=hyperspectral_savedir,
        use_dask=False,
        extend_box=3)

    dataset = boxes.tf_dataset(created_records, batch_size=1)
    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()

    with tensorflow.Session() as sess:
        labels = []
        counter = 0
        while True:
            try:
                image, label = sess.run(next_element)
                assert image.shape == (1, height, width, 3)
                assert label.shape == (1, 2)

                plt.imshow(image[0].astype("uint8"))
                labels.append(label)
                counter += 1
            except Exception as e:
                print(e)
                break
    assert counter == 3
    assert max([np.argmax(x) for x in labels])
示例#8
0
    def predict_raster(self, tfrecords, batch_size=1):
        """Predicted a set of tfrecords and create a raster image"""
        prediction_set = boxes.tf_dataset(tfrecords=tfrecords,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          mode="predict",
                                          cores=self.config["cpu_workers"])

        predictions = []
        row_list = []
        col_list = []
        for image, x, y in prediction_set:
            try:
                softmax_batch = self.model.predict_on_batch(image)
                row_list.append(x.numpy())
                col_list.append(y.numpy())
                predictions.append(softmax_batch)
            except tf.errors.OutOfRangeError:
                print("Completed {} predictions".format(len(predictions)))

        #stack
        predictions = np.vstack(predictions)
        row_list = np.concatenate(row_list)
        col_list = np.concatenate(col_list)
        predictions = np.argmax(predictions, 1)
        results = pd.DataFrame({
            "label": predictions,
            "row": row_list,
            "col": col_list
        })
        results = results.sort_values(by=["row", "col"])

        return results
示例#9
0
    def predict_boxes(self, tfrecords, batch_size=1):
        """Predicted a set of tfrecords and create a raster image"""
        prediction_set = boxes.tf_dataset(tfrecords=tfrecords,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          mode="predict",
                                          cores=self.config["cpu_workers"])

        predictions = []
        indices = []
        for image, box_index in prediction_set:
            try:
                softmax_batch = self.model.predict_on_batch(image)
                predictions.append(softmax_batch)
                indices.append(box_index)
            except tf.errors.OutOfRangeError:
                print("Completed {} predictions".format(len(predictions)))

        #stack
        predictions = np.vstack(predictions)
        predictions = np.argmax(predictions, 1)

        indices = np.concatenate(indices)

        #Read class labels
        labels = [
            self.classes_file.loc[self.classes_file.index == x, "taxonID"].values[0] for x in predictions
        ]
        results = pd.DataFrame({"label": labels, "box_index": indices})

        #decode results
        results["box_index"] = results["box_index"].apply(lambda x: x.decode()).astype(
            str)

        return results
示例#10
0
def test_ensemble(created_records):
    dataset = boxes.tf_dataset(created_records, batch_size=2, mode="ensemble")
    for data, label_batch in dataset.take(1):
        HSI, elevation, site, domain = data
        assert HSI.shape == (2, 40, 40, 369)
        assert elevation.numpy().shape == (2, )
        assert site.numpy().shape == (2, 10)
        assert domain.numpy().shape == (2, 16)
示例#11
0
def test_RGB_submodel(created_records):
    dataset = boxes.tf_dataset(created_records,
                               batch_size=2,
                               mode="RGB_submodel")
    for batch in dataset.take(1):
        data, label = batch
        assert data.shape == (2, 100, 100, 3)
        assert len(label) == 3
        assert label[0].shape == (2, 6)
示例#12
0
def test_neighbor(created_records):
    dataset = boxes.tf_dataset(created_records, batch_size=2, mode="neighbors")
    for data, label_batch in dataset.take(1):
        HSI, neighbor_array, elevation, site, domain = data

        assert HSI.shape == (2, 20, 20, 369)
        assert neighbor_array.shape == (2, 5, 4)
        assert elevation.numpy().shape == (2, )
        assert site.numpy().shape == (2, 10)
        assert domain.numpy().shape == (2, 10)
示例#13
0
def test_id_train(created_records):
    shp = gpd.read_file(test_predictions)
    dataset = boxes.tf_dataset(created_records,
                               batch_size=2,
                               ids=True,
                               mode="RGB")
    for ids, batch in dataset.take(1):
        data, label = batch
        assert ids.numpy().shape == (2, )

    assert all([x in shp.index.values for x in ids.numpy()])
示例#14
0
def test_id_train(created_records):
    shp = gpd.read_file(test_predictions)
    dataset = boxes.tf_dataset(created_records,
                               batch_size=2,
                               ids=True,
                               mode="RGB")
    for ids, batch in dataset.take(1):
        data, label = batch
        assert ids.numpy().shape == (2, )

    basename = os.path.splitext(os.path.basename(test_predictions))[0]
    shp["box_index"] = ["{}_{}".format(basename, x) for x in shp.index.values]
    assert all([x in shp.box_index.values for x in ids.numpy()])
示例#15
0
def test_generate_tfrecords(train, created_records):
    assert all([os.path.exists(x) for x in created_records])

    if train:
        dataset = boxes.tf_dataset(created_records, mode="RGB", batch_size=2)
    else:
        dataset = boxes.tf_dataset(created_records,
                                   mode="RGB",
                                   batch_size=2,
                                   ids=True)

    if train:
        #Yield a batch of data and confirm its shape
        for batch in dataset.take(1):
            data, label = batch
            assert data.shape == (2, 100, 100, 3)
            assert label.shape == (2, 6)
    else:
        for ids, batch in dataset.take(3):
            data, label = batch
            assert data.shape == (2, 100, 100, 3)
            assert ids.shape == (2)
示例#16
0
def test_generate_tfrecords(train, tmpdir):
    
    shp = gpd.read_file(test_predictions)
    created_records = boxes.generate_tfrecords(
        shapefile=test_predictions,
        site = 1,
        elevation=100,
        heights=np.random.random(shp.shape[0])*10,
        savedir=tmpdir,
        train=train,
        HSI_sensor_path=test_sensor_tile,
        RGB_sensor_path=test_sensor_tile,
        species_label_dict=None,
        RGB_size=100,
        HSI_size=20,
        classes=6,
        number_of_sites=10)
    
    assert all([os.path.exists(x) for x in created_records])
    
    if train:
        dataset = boxes.tf_dataset(created_records, batch_size=2, mode="ensemble")
    else:
        dataset = boxes.tf_dataset(created_records, batch_size=2, mode="predict")
    
    if train:
        #Yield a batch of data and confirm its shape
        for (HSI, RGB, elevation, height, site), label_batch in dataset.take(3):
            assert HSI.shape == (2,20,20,3)
            assert RGB.shape == (2,100,100,3)    
            assert elevation.shape == (2)
            assert site.shape == (2,10)            
            assert height.shape == (2)                       
            assert label_batch.shape == (2,6)
    else:
        for (HSI, RGB ), box_index_batch in dataset.take(3):
            assert HSI.shape == (2,20,20,3)
            assert RGB.shape == (2,100,100,3) 
            assert box_index_batch.shape == (2,)
示例#17
0
def test_generate(mod):
    shp = gpd.read_file(test_predictions)
    created_records = mod.generate(shapefile=test_predictions,
                                   domain=1,
                                   site=0,
                                   elevation=100,
                                   HSI_sensor_path=test_sensor_hyperspec,
                                   RGB_sensor_path=test_sensor_tile,
                                   train=True,
                                   chunk_size=2,
                                   savedir=mod.config["train"]["tfrecords"])

    assert all([os.path.exists(x) for x in created_records])

    dataset = boxes.tf_dataset(created_records, batch_size=1)
    counter = 0
    for batch in dataset:
        batch
        counter += 1

    assert counter == shp.shape[0]
示例#18
0
def test_metadata(tmpdir):
    shp = gpd.read_file(test_predictions)
    
    created_records = boxes.generate_tfrecords(
        shapefile=test_predictions,
        site = 1,
        heights=np.random.random(shp.shape[0])*10,        
        elevation=100,
        savedir=tmpdir,
        HSI_sensor_path=test_sensor_tile,
        RGB_sensor_path=test_sensor_tile,
        species_label_dict=None,
        RGB_size=100,
        HSI_size=20,
        classes=6,
        number_of_sites=10)
    
    dataset = boxes.tf_dataset(created_records, batch_size=2, mode="metadata")
    for data, label_batch in dataset.take(3):
        elevation, height, site = data
        assert elevation.numpy().shape == (2,)
        assert site.numpy().shape == (2,10)
示例#19
0
    def find_outliers(self):
        self.autoencoder_model = cleaning.autoencoder_model(
            height=self.HSI_size,
            width=self.HSI_size,
            channels=self.HSI_channels)
        self.autoencoder_model.fit(
            self.train_split,
            batch_size=self.config["train"]["batch_size"],
            epochs=self.config["autoencoder"]["epochs"],
            validation_data=self.val_split)

        ## training data ##
        self.train_split_with_ids = boxes.tf_dataset(
            tfrecords=self.train_records,
            batch_size=self.config["train"]["batch_size"],
            shuffle=False,
            mode="HSI_autoencoder",
            ids=True,
            cache=False,
            augmentation=False,
            cores=self.config["cpu_workers"])

        #Get the true labels since they are not shuffled
        y_pred = []
        box_index = []

        mse = tf.keras.losses.MeanSquaredError()
        for index, batch in self.train_split_with_ids:
            data, label = batch
            prediction = self.autoencoder_model.predict(data)
            for x in np.arange(prediction.shape[0]):
                print(x)
                error = mse(prediction[x, :, :, :], data[x, :, :, :])
                y_pred.append(error.numpy())
                box_index.append(index.numpy()[x])

        results = pd.DataFrame({"error": y_pred, "point_id": box_index})

        #Read original data
        #Merge
        joined_gdf = self.train_shp.merge(results, on="point_id")

        #outlier threshold
        threshold = joined_gdf.error.quantile(
            self.config["autoencoder"]["quantile"])
        train_error_df = joined_gdf[joined_gdf.error > threshold]

        ## repeat for test data ##
        #Get the true labels since they are not shuffled
        y_pred = []
        box_index = []

        mse = tf.keras.losses.MeanSquaredError()
        for index, batch in self.val_split_with_ids:
            data, label = batch
            prediction = self.autoencoder_model.predict(data)
            for x in np.arange(prediction.shape[0]):
                error = mse(prediction[x, :, :, :], data[x, :, :, :])
                y_pred.append(error.numpy())
                box_index.append(index.numpy()[x])

        results = pd.DataFrame({"error": y_pred, "point_id": box_index})

        #Read original data
        #Merge
        joined_gdf = self.test_shp.merge(results, on="point_id")

        #outlier threshold
        test_error_df = joined_gdf[joined_gdf.error > threshold]

        return train_error_df, test_error_df
示例#20
0
import glob
from DeepTreeAttention.generators import boxes
created_records = glob.glob(
    "/orange/idtrees-collab/DeepTreeAttention/tfrecords/train/*.tfrecord")
dataset = boxes.tf_dataset(created_records, batch_size=100)
counter = 0
for image, label in dataset:
    counter += image.shape[0]
示例#21
0
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from DeepTreeAttention.generators import boxes

#metadata
created_records = glob.glob("/orange/idtrees-collab/DeepTreeAttention/tfrecords/evaluation/*.tfrecord")
dataset = boxes.tf_dataset(created_records, mode = "metadata", batch_size=10)
counter=0
labels=[]
data =[]
for data, label in dataset:
    counter+=data[0].shape[0]
    labels.append(label)
    print(counter)
    
labels = np.concatenate(labels)
labels = np.argmax(labels,1)
pd.DataFrame({"label":labels}).groupby("label").size()
示例#22
0
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from DeepTreeAttention.generators import boxes

#metadata
created_records = glob.glob("/orange/idtrees-collab/DeepTreeAttention/tfrecords/evaluation/*.tfrecord")
dataset = boxes.tf_dataset(created_records, mode="metadata",batch_size=256)
counter=0
labels=[]
data =[]
for metadata, label in dataset:
    counter+=metadata.shape[0]
    print(counter)
    labels.append(label)
    data.append(metadata)



created_records = glob.glob("/orange/idtrees-collab/DeepTreeAttention/tfrecords/train/*.tfrecord")
dataset = boxes.tf_dataset(created_records, mode="ensemble",batch_size=256)
counter=0
labels=[]
data =[]
for (HSI, RGB, elevation, height, sites), label in dataset:
    counter+=RGB.shape[0]
    labels.append(label)
    data.append(RGB)

labels = np.vstack(labels)