def run(record): att = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) site_classes_file = "{}/data/processed/site_class_labels.csv".format(ROOT) site_classdf = pd.read_csv(site_classes_file) site_label_dict = site_classdf.set_index("siteID").label.to_dict() domain_classes_file = "{}/data/processed/domain_class_labels.csv".format(ROOT) domain_classdf = pd.read_csv(domain_classes_file) domain_label_dict = domain_classdf.set_index("domainID").label.to_dict() species_classes_file = "{}/data/processed/species_class_labels.csv".format(ROOT) species_classdf = pd.read_csv(species_classes_file) species_label_dict = species_classdf.set_index("taxonID").label.to_dict() rgb_pool = glob.glob(att.config["rgb_sensor_pool"], recursive=True) hyperspectral_pool = glob.glob(att.config["hyperspectral_sensor_pool"], recursive=True) #Convert h5 hyperspec renamed_record = record.replace("itc_predictions", "image") hyperspec_path = lookup_and_convert(shapefile=renamed_record, rgb_pool=rgb_pool, hyperspectral_pool=hyperspectral_pool, savedir=att.config["hyperspectral_tif_dir"]) rgb_path = find_sensor_path(shapefile=renamed_record, lookup_pool=rgb_pool) #infer site site = site_from_path(renamed_record) numeric_site = site_label_dict[site] domain = domain_from_path(renamed_record) numeric_domain = domain_label_dict[domain] #infer elevation h5_path = find_sensor_path(shapefile=renamed_record, lookup_pool=hyperspectral_pool) elevation = elevation_from_tile(h5_path) #Generate record when complete df = pd.read_csv(record) # hot fix the heights for the moment. heights = np.repeat(10, df.shape[0]) tfrecords = att.generate( csv_file=record, HSI_sensor_path=hyperspec_path, RGB_sensor_path=rgb_path, chunk_size=500, train=True, domain=numeric_domain, site=numeric_site, heights=heights, elevation=elevation, label_column="filtered_taxonID", species_label_dict=species_label_dict ) return tfrecords
def run(record, savedir, raw_box_dir): """Take a plot of deepforest prediction (see prepare_field_data.py) and generate crops for training/evalution""" #Read record df = gpd.read_file(record) #get bounding boxes from the surrounding trees basename = os.path.basename(record) raw_boxes ="{}/{}".format(raw_box_dir, basename) att = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) site_classes_file = "{}/data/processed/site_class_labels.csv".format(ROOT) site_classdf = pd.read_csv(site_classes_file) site_label_dict = site_classdf.set_index("siteID").label.to_dict() domain_classes_file = "{}/data/processed/domain_class_labels.csv".format(ROOT) domain_classdf = pd.read_csv(domain_classes_file) domain_label_dict = domain_classdf.set_index("domainID").label.to_dict() species_classes_file = "{}/data/processed/species_class_labels.csv".format(ROOT) species_classdf = pd.read_csv(species_classes_file) species_label_dict = species_classdf.set_index("taxonID").label.to_dict() rgb_pool = glob.glob(att.config["rgb_sensor_pool"], recursive=True) hyperspectral_pool = glob.glob(att.config["hyperspectral_sensor_pool"], recursive=True) #Convert h5 hyperspec hyperspec_path = lookup_and_convert(bounds=df.total_bounds, rgb_pool=rgb_pool, hyperspectral_pool=hyperspectral_pool, savedir=att.config["hyperspectral_tif_dir"]) rgb_path = find_sensor_path(bounds=df.total_bounds, lookup_pool=rgb_pool) #infer site, only 1 per plot. site = df.siteID.unique()[0] numeric_site = site_label_dict[site] domain = df.domainID.unique()[0] numeric_domain = domain_label_dict[domain] #infer elevation h5_path = find_sensor_path(bounds=df.total_bounds, lookup_pool=hyperspectral_pool) elevation = elevation_from_tile(h5_path) att.ensemble_model = tf.keras.models.load_model("{}/Ensemble.h5".format(att.config["neighbors"]["model_dir"]), custom_objects={"WeightedSum":WeightedSum}) ensemble_model = tf.keras.Model(att.ensemble_model.inputs, att.ensemble_model.get_layer("ensemble_learn").output) #Generate record when complete tfrecords = att.generate( shapefile=record, raw_boxes=raw_boxes, HSI_sensor_path=hyperspec_path, RGB_sensor_path=rgb_path, chunk_size=500, train=True, domain=numeric_domain, site=numeric_site, elevation=elevation, label_column="taxonID", species_label_dict=species_label_dict, ensemble_model=None, savedir=savedir ) return tfrecords
#Linear metadata model for testing purposes from comet_ml import Experiment import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("metadata") ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data(mode="metadata") #Cree inputs, outputs = metadata.metadata_model( classes=model.config["train"]["classes"]) meta_model = tf.keras.Model(inputs=inputs, outputs=outputs, name="DeepTreeAttention") meta_model.compile(loss='categorical_crossentropy',
if __name__ == "__main__": sleep(randint(0,20)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format("/orange/idtrees-collab/DeepTreeAttention/snapshots/",timestamp) os.mkdir(save_dir) experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("Train") #Create output folder experiment.log_parameter("timestamp",timestamp) experiment.log_parameter("log_dir",save_dir) #Create a class and run model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml", log_dir=save_dir) model.create() if model.config["train"]["pretraining_dir"]: model.HSI_model.load_weights("{}/HSI_model.h5".format(model.config["train"]["pretraining_dir"])) #Log config experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.log_parameters(model.config["train"]["ensemble"]) ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data(mode="HSI")
from time import sleep from random import randint from datetime import datetime import os from comet_ml import Experiment from DeepTreeAttention.trees import AttentionModel sleep(randint(0,20)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format("/orange/idtrees-collab/DeepTreeAttention/snapshots/",timestamp) os.mkdir(save_dir) experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("Cleaning") #Create output folder experiment.log_parameter("timestamp",timestamp) experiment.log_parameter("log_dir",save_dir) #Create a class and run model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml", log_dir=save_dir) model.read_data("HSI_autoencoder") train_df, test_df = model.find_outliers() train_df.to_file("{}/train_outliers.shp".format(save_dir)) test_df.to_file("{}/test_outliers.shp".format(save_dir))
def mod(tmpdir): mod = AttentionModel(config="conf/tree_config.yml") mod.sites = 2 mod.domains = 2 mod.RGB_channels = 3 mod.HSI_channels = 3 train_dir = tmpdir.mkdir("train") label_file = "{}/label_file.csv".format(train_dir) mod.config["train"]["tfrecords"] = train_dir mod.classes_file = "data/processed/species_class_labels.csv" domain = "D17" numeric_domain = domain_label_dict[domain] site = "BART" numeric_site = site_label_dict[site] created_records = mod.generate(shapefile=test_predictions, site=numeric_site, domain=numeric_domain, elevation=100/1000, HSI_sensor_path=test_sensor_tile, RGB_sensor_path=test_sensor_tile, train=True, chunk_size=2, savedir = mod.config["train"]["tfrecords"]) #create a fake label file pd.DataFrame({"taxonID":["Ben","Jon"],"label":[0,1]}).to_csv(label_file) mod.classes_file = label_file mod.create() mod.ensemble(experiment=None) return mod
_predict_(shapefile, model_path, savedir=savedir, create_records=generate) if __name__ == "__main__": experiment = Experiment(project_name="neontrees", workspace="bw4sz") #Create output folder #Sleep for a moment to allow queries to build up in SLURM queue sleep(randint(0,10)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format("/orange/idtrees-collab/DeepTreeAttention/snapshots/",timestamp) os.mkdir(save_dir) experiment.log_parameter("timestamp",timestamp) #Create a class and run model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["predict"]) ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data(validation_split=True) class_weight = model.calc_class_weight() ## Train subnetwork experiment.log_parameter("Train subnetworks", True) with experiment.context_manager("spatial_subnetwork"): print("Train spatial subnetwork")
#Linear metadata model for testing purposes from comet_ml import Experiment import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd from tensorflow.keras.models import load_model from DeepTreeAttention.models.layers import WeightedSum from random import randint from time import sleep from datetime import datetime import os model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() sleep(randint(0, 20)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format( "/orange/idtrees-collab/DeepTreeAttention/snapshots/", timestamp) os.mkdir(save_dir) #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("RGB") experiment.log_parameter("timestamp", timestamp)
def mod(tmpdir): mod = AttentionModel(config="conf/tree_config.yml") mod.sites = 2 mod.domains = 2 mod.RGB_channels = 3 mod.HSI_channels = 3 train_dir = tmpdir.mkdir("train") label_file = "{}/label_file.csv".format(train_dir) shp = gpd.read_file(test_predictions) mod.config["train"]["tfrecords"] = train_dir mod.classes_file = "data/processed/species_class_labels.csv" created_records = mod.generate(shapefile=test_predictions, site=0, domain=1, elevation=100, heights=np.random.random(shp.shape[0]), HSI_sensor_path=test_sensor_tile, RGB_sensor_path=test_sensor_tile, train=True, chunk_size=2) #create a fake label file pd.DataFrame({ "taxonID": ["Ben", "Jon"], "label": [0, 1] }).to_csv(label_file) mod.classes_file = label_file mod.create() mod.ensemble(experiment=None, train=False) #turn ensemble model into a feature extractor of the 2nd to last layer. mod.ensemble_model = tfk.Model( mod.ensemble_model.inputs, mod.ensemble_model.get_layer("submodel_concat").output) return mod
sleep(randint(0, 20)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format( "/orange/idtrees-collab/DeepTreeAttention/snapshots/", timestamp) os.mkdir(save_dir) experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("Train") #Create output folder experiment.log_parameter("timestamp", timestamp) experiment.log_parameter("log_dir", save_dir) #Create a class and run model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml", log_dir=save_dir) model.read_data("HSI") model.create() baseline = vanilla.create( height=model.config["train"]["HSI"]["crop_size"], width=model.config["train"]["HSI"]["crop_size"], channels=model.config["train"]["HSI"]["sensor_channels"], classes=model.classes) baseline.compile(loss="categorical_crossentropy", optimizer=tf.keras.optimizers.Adam( lr=float(model.config["train"]["learning_rate"])), metrics=[tf.keras.metrics.CategoricalAccuracy(name='acc')]) labeldf = pd.read_csv(model.classes_file)
sleep(randint(0, 10)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format( "/orange/idtrees-collab/DeepTreeAttention/snapshots/", timestamp) os.mkdir(save_dir) experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("neighbors") #Create output folder experiment.log_parameter("timestamp", timestamp) experiment.log_parameter("log_dir", save_dir) #Create a class and run model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml", log_dir=save_dir) model.create() ensemble_model = tfk.models.load_model( "{}/Ensemble.h5".format(model.config["neighbors"]["model_dir"]), custom_objects={"WeightedSum": WeightedSum}) model.read_data("neighbors") experiment.log_parameters(model.config["neighbors"]) neighbor = neighbors_model.create( ensemble_model=ensemble_model, k_neighbors=model.config["neighbors"]["k_neighbors"], classes=model.classes, freeze=model.config["neighbors"]["freeze"])
def mod(tmpdir): mod = AttentionModel(config="conf/tree_config.yml") train_dir = tmpdir.mkdir("train") predict_dir = tmpdir.mkdir("predict") label_file = "{}/label_file.csv".format(train_dir) #create a fake label file pd.DataFrame({ "taxonID": ["Ben", "Jon"], "label": [0, 1] }).to_csv(label_file) config = {} train_config = {} train_config["tfrecords"] = train_dir train_config["batch_size"] = 1 train_config["epochs"] = 1 train_config["steps"] = 1 train_config["gpus"] = 1 train_config["crop_size"] = 20 train_config["shuffle"] = True train_config["weighted_sum"] = False train_config["classes"] = 2 train_config["species_class_file"] = label_file #evaluation eval_config = {} eval_config["tfrecords"] = None eval_config["steps"] = 1 eval_config["ground_truth_path"] = "data/processed/test.shp" predict_config = {} predict_config["tfrecords"] = predict_dir config["train"] = train_config config["evaluation"] = eval_config config["predict"] = predict_config #Replace config for testing env for key, value in config.items(): for nested_key, nested_value in value.items(): mod.config[key][nested_key] = nested_value #Update the inits mod.RGB_size = mod.config["train"]["RGB"]["crop_size"] mod.HSI_size = mod.config["train"]["HSI"]["crop_size"] mod.HSI_channels = 369 mod.RGB_channels = 3 mod.extend_HSI_box = mod.config["train"]["HSI"]["extend_box"] mod.classes_file = label_file mod.train_shp = pd.DataFrame({ "taxonID": ["Jon", "Ben"], "siteID": [0, 1], "domainID": [0, 1], "plotID": [0, 1], "canopyPosition": ["a", "b"], "scientific": ["genus species", "genus species"] }) mod.train_shp.index = [2, 7] mod.sites = 23 mod.domains = 15 #Create a model with input sizes mod.create() return mod
#Linear metadata model for testing purposes from comet_ml import Experiment import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("metadata") ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy with experiment.context_manager("metadata"): model.read_data(mode="metadata") class_weight = model.calc_class_weight() model.train(submodel="metadata", experiment=experiment, class_weight=class_weight)
from DeepTreeAttention.generators import neighbors sleep(randint(0,20)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format("/orange/idtrees-collab/DeepTreeAttention/snapshots/",timestamp) os.mkdir(save_dir) experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("Cleaning") #Create output folder experiment.log_parameter("timestamp",timestamp) experiment.log_parameter("log_dir",save_dir) #Create a class and run model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml", log_dir=save_dir) model.create() model.ensemble_model = tfk.models.load_model("{}/Ensemble.h5".format(model.config["neighbors"]["model_dir"]), custom_objects={"WeightedSum":WeightedSum}) hyperspectral_pool = glob.glob(model.config["hyperspectral_sensor_pool"], recursive=True) #Load field data ROOT = os.path.dirname(os.path.dirname(ROOT)) train = gpd.read_file("{}/data/processed/train.shp".format(ROOT)) test = gpd.read_file("{}/data/processed/test.shp".format(ROOT)) site_classes_file = "{}/data/processed/site_class_labels.csv".format(ROOT) site_classdf = pd.read_csv(site_classes_file) site_label_dict = site_classdf.set_index("siteID").label.to_dict() domain_classes_file = "{}/data/processed/domain_class_labels.csv".format(ROOT)
#Linear metadata model for testing purposes from comet_ml import Experiment import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("HSI") ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data() class_weight = model.calc_class_weight() ##Train subnetwork experiment.log_parameter("Train subnetworks", True) with experiment.context_manager("HSI_spatial_subnetwork"): print("Train HSI spatial subnetwork") model.read_data(mode="HSI_submodel") model.train(submodel="spatial", sensor="hyperspectral",class_weight=[class_weight, class_weight, class_weight], experiment=experiment)
#Generate tfrecords from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.generators import boxes from DeepTreeAttention.utils.start_cluster import start from DeepTreeAttention.utils.paths import lookup_and_convert from distributed import wait import glob import os att = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") #get root dir full path client = start(cpus=10, mem_size="5GB") #Generate training data train_tfrecords = [] shapefiles = glob.glob(os.path.join("/orange/idtrees-collab/DeepTreeAttention/WeakLabels/","*.shp")) for shapefile in shapefiles: sensor_path = lookup_and_convert(shapefile, rgb_pool=att.config["train"]["rgb_sensor_pool"], hyperspectral_pool=att.config["train"]["hyperspectral_sensor_pool"], savedir=att.config["hyperspectral_tif_dir"]) future = client.submit(att.generate, shapefile=shapefile, sensor_path=sensor_path, chunk_size=10000, train=True) train_tfrecords.append(future) wait(train_tfrecords) for x in train_tfrecords: x.result()
if __name__ == "__main__": experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("Train") #Create output folder #Sleep for a moment to allow queries to build up in SLURM queue sleep(randint(0, 10)) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") save_dir = "{}/{}".format( "/orange/idtrees-collab/DeepTreeAttention/snapshots/", timestamp) os.mkdir(save_dir) experiment.log_parameter("timestamp", timestamp) #Create a class and run model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.log_parameters(model.config["train"]["ensemble"]) ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data() class_weight = model.calc_class_weight() #Load from file and compile or train new models if model.config["train"]["checkpoint_dir"] is not None: