Exemplo n.º 1
0
def LoadDatasetFromJson(dataset_json,model_name = ""):
	file_path = dataset_json["ground_truth_csv_path"]
	image_url_column = "image_path"
	ground_truth_column = "label"
	label_names = [label["label"] for label in dataset_json["labels"]] # gets all labels in dataset. To use a subset of labels, build a list manually

	### instantiate dataset tool
	csv_path = os.path.join(datasets_path,"dataset_csvs",file_path)
	dataset_images_dir_path =  os.path.join(datasets_path,"dataset_images")
	dataset_tool = DataSet(csv_path,image_url_column,ground_truth_column,explicit_path_suffix =dataset_images_dir_path) #instantiates a dataset tool
	dataset_tool.CreateLiveDataSet(dataset_max_size = -1, even_examples=True, y_labels_to_use=label_names) #creates an organised list of dataset observations, evenly split between labels
	
	if(model_name == ""):
		training_split_file = dataset_json["default_training_allocation_path"]
		training_split_file_path = os.path.join(datasets_path,"dataset_csvs",training_split_file)
		dataset_tool.ProduceDataFromTrainingSplitFile(training_split_file_path, explicit_path_suffix = dataset_images_dir_path)
	else:
		#TODO allow for passing the model name and load the specific split from the models training(replace the code below with solution)
		training_split_file = dataset_json["default_training_allocation_path"]
		training_split_file_path = os.path.join(datasets_path,"dataset_csvs",training_split_file)
		dataset_tool.ProduceDataFromTrainingSplitFile(training_split_file_path, explicit_path_suffix = dataset_images_dir_path)

	#Code to create new split if required
	#dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets

	return dataset_tool,label_names
    def LoadFrameworkDataset(self,
                             dataset_name,
                             load_split_if_available=True,
                             train_ratio=0.8,
                             validation_ratio=0.1,
                             test_ratio=0.1):
        dataset_json = [
            dataset for dataset in self.datasets_json["datasets"]
            if dataset["dataset_name"] == dataset_name
        ][0]

        ### gather required information about the dataset
        if (load_split_if_available
                and "default_training_allocation_path" in dataset_json.keys()):
            file_path = dataset_json["default_training_allocation_path"]
            load_split = True
        else:
            file_path = dataset_json["ground_truth_csv_path"]
            load_split = False
            print("new training split will be created")

        mean = None
        std = None

        if ("mean" in dataset_json):
            mean = dataset_json["mean"]

        if ("std" in dataset_json):
            std = dataset_json["std"]

        image_url_column = "image_path"
        ground_truth_column = "label"

        ### instantiate dataset tool
        csv_path = os.path.join(self.datasets_path, "dataset_csvs", file_path)
        dataset_images_dir_path = os.path.join(self.datasets_path,
                                               "dataset_images")

        dataset_tool = DataSet(csv_path,
                               image_url_column,
                               ground_truth_column,
                               explicit_path_suffix=dataset_images_dir_path,
                               mean=mean,
                               std=std)  #instantiates a dataset tool

        if (load_split):
            dataset_tool.ProduceDataFromTrainingSplitFile(
                csv_path, explicit_path_suffix=dataset_images_dir_path)
        else:
            dataset_tool.SplitLiveData(
                train_ratio=train_ratio,
                validation_ratio=validation_ratio,
                test_ratio=test_ratio
            )  #splits the live dataset examples in to train, validation and test sets
            dataset_tool.OutputTrainingSplitAllocation(
                csv_path.replace(".csv", "_split.csv"))

        return dataset_json, dataset_tool
Exemplo n.º 3
0
    label["label"] for label in dataset_json["labels"]
]  # gets all labels in dataset. To use a subset of labels, build a list manually
label_names.sort()
print(label_names)

input_image_height = dataset_json["image_y"]
input_image_width = dataset_json["image_x"]
input_image_channels = dataset_json["image_channels"]

### instantiate dataset tool
csv_path = os.path.join(datasets_path, "dataset_csvs", file_path)
dataset_images_dir_path = os.path.join(datasets_path, "dataset_images")

dataset_tool = DataSet(
    csv_path,
    image_url_column,
    ground_truth_column,
    explicit_path_suffix=dataset_images_dir_path)  #instantiates a dataset tool

dataset_tool.CreateLiveDataSet(
    dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names
)  #creates an organised list of dataset observations, evenly split between labels

if (load_split):
    dataset_tool.ProduceDataFromTrainingSplitFile(
        csv_path, explicit_path_suffix=dataset_images_dir_path)
else:
    dataset_tool.SplitLiveData(
        train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1
    )  #splits the live dataset examples in to train, validation and test sets
import os

from DatasetClass import DataSet

dataset_name = "imagenet_vehicles_birds_10"

file_path = os.path.join("dataset_csvs", dataset_name + ".csv")
image_url_column = "image_path"
ground_truth_column = "label"

dataset_tool = DataSet(file_path, image_url_column, ground_truth_column)

image_dir = os.path.join("dataset_images", dataset_name)
output_dir = os.path.join("dataset_images", dataset_name + "_resized")

target_width = 128
target_height = 128

#cropping
#original image size
width = 128  #y
height = 128  #x

crop_images_before_resize = False

if (crop_images_before_resize):

    ## calculate crop around centre
    mid_x = height / 2
    mid_y = width / 2
    def GetBackgroundSamples(self, dataset_name):
        if (dataset_name in self.dataset_tool_dict):
            return self.dataset_tool_dict[dataset_name]
        else:

            def GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo"):
                cwd = os.getcwd()
                split_cwd = cwd.split("/")

                base_path_list = []
                for i in range(1, len(split_cwd)):
                    if (split_cwd[-i] == base_dir_name):
                        base_path_list = split_cwd[:-i + 1]

                if (base_path_list == []):
                    raise IOError(
                        'base project path could not be constructed. Are you running within: '
                        + base_dir_name)

                base_dir_path = "/".join(base_path_list)

                return base_dir_path

            base_dir = GetProjectExplicitBase(base_dir_name="p5_afm_2018_demo")

            #add dataset folder to sys path to allow for easy import
            datasets_path = os.path.join(base_dir, "datasets")
            sys.path.append(datasets_path)
            ###

            #import dataset tool
            from DatasetClass import DataSet

            #### load dataset json
            data_json_path = os.path.join(datasets_path, "datasets.json")

            datasets_json = None
            with open(data_json_path, "r") as f:
                datasets_json = json.load(f)

            ### get dataset details
            dataset_json = [
                dataset for dataset in datasets_json["datasets"]
                if dataset["dataset_name"] == dataset_name
            ][0]

            ### gather required information about the dataset
            if ("default_training_allocation_path" in dataset_json.keys()):
                file_path = dataset_json["default_training_allocation_path"]
                load_split = True
            else:
                file_path = dataset_json["ground_truth_csv_path"]
                load_split = False

            image_url_column = "image_path"
            ground_truth_column = "label"
            label_names = [
                label["label"] for label in dataset_json["labels"]
            ]  # gets all labels in dataset. To use a subset of labels, build a list manually
            label_names.sort()
            print(label_names)

            ### instantiate dataset tool
            csv_path = os.path.join(datasets_path, "dataset_csvs", file_path)
            print(csv_path)
            dataset_images_dir_path = os.path.join(datasets_path,
                                                   "dataset_images")

            dataset_tool = DataSet(csv_path,
                                   image_url_column,
                                   ground_truth_column,
                                   explicit_path_suffix=dataset_images_dir_path
                                   )  #instantiates a dataset tool

            dataset_tool.CreateLiveDataSet(
                dataset_max_size=-1,
                even_examples=True,
                y_labels_to_use=label_names
            )  #creates an organised list of dataset observations, evenly split between labels

            if (load_split):
                dataset_tool.ProduceDataFromTrainingSplitFile(
                    csv_path, explicit_path_suffix=dataset_images_dir_path)
            else:
                dataset_tool.SplitLiveData(
                    train_ratio=0.8, validation_ratio=0.1, test_ratio=0.1
                )  #splits the live dataset examples in to train, validation and test sets

        source = "train"
        train_x, train_y = dataset_tool.GetBatch(batch_size=1024,
                                                 even_examples=True,
                                                 y_labels_to_use=label_names,
                                                 split_batch=True,
                                                 split_one_hot=True,
                                                 batch_source=source)

        self.dataset_tool_dict[dataset_name] = train_x

        return train_x
label_names = [
    label["label"] for label in dataset_json["labels"]
]  # gets all labels in dataset. To use a subset of labels, build a list manually
label_names.sort()
print(label_names)

input_image_height = dataset_json["image_y"]
input_image_width = dataset_json["image_x"]
input_image_channels = dataset_json["image_channels"]

### instantiate dataset tool
csv_path = os.path.join(datasets_path, "dataset_csvs", file_path)
dataset_images_dir_path = os.path.join(datasets_path, "dataset_images")
dataset_tool = DataSet(
    csv_path,
    image_url_column,
    ground_truth_column,
    explicit_path_suffix=dataset_images_dir_path)  #instantiates a dataset tool
dataset_tool.CreateLiveDataSet(
    dataset_max_size=-1, even_examples=True, y_labels_to_use=label_names
)  #creates an organised list of dataset observations, evenly split between labels

#load exisiting split
training_split_file = dataset_json["default_training_allocation_path"]
training_split_file_path = os.path.join(datasets_path, "dataset_csvs",
                                        training_split_file)
dataset_tool.ProduceDataFromTrainingSplitFile(
    training_split_file_path, explicit_path_suffix=dataset_images_dir_path)

#CREATE NEW SPLIT
#dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets
Exemplo n.º 7
0
### gather required information about the dataset
file_path = dataset_json["ground_truth_csv_path"]
image_url_column = "image_path"
ground_truth_column = "label"
label_names = [label["label"] for label in dataset_json["labels"]] # gets all labels in dataset. To use a subset of labels, build a list manually
label_names.sort()
print(label_names)

input_image_height = dataset_json["image_y"]
input_image_width = dataset_json["image_x"]
input_image_channels = dataset_json["image_channels"]

### instantiate dataset tool
csv_path = os.path.join(datasets_path,"dataset_csvs",file_path)
dataset_images_dir_path =  os.path.join(datasets_path,"dataset_images")
dataset_tool = DataSet(csv_path,image_url_column,ground_truth_column,explicit_path_suffix =dataset_images_dir_path) #instantiates a dataset tool
dataset_tool.CreateLiveDataSet(dataset_max_size = -1, even_examples=True, y_labels_to_use=label_names) #creates an organised list of dataset observations, evenly split between labels
dataset_tool.SplitLiveData(train_ratio=0.8,validation_ratio=0.1,test_ratio=0.1) #splits the live dataset examples in to train, validation and test sets


### get example batch and display an image
display_example_image = False

if(display_example_image):
    ##select the source for the example
    # source = "full"
    # source = "train"
    # source = "validation"
    source = "test"

    x, y = dataset_tool.GetBatch(batch_size = 128,even_examples=True, y_labels_to_use=label_names, split_batch = True,split_one_hot = True, batch_source = source)