def __init__(self, args): size = 100 self.h = 10 self.w = 10 self.rs = args.rs random_state = np.random.RandomState(seed=self.rs) self.images = [] self.bag_labels = [] self.instance_labels = [] for i in range(size): data, instance_label, b_label = generate_positive_image( self.h, self.w, random_state) self.images.append(data) self.bag_labels.append(b_label) self.instance_labels.append(instance_label) #Uncomment for MIMN definition of positive image data, instance_label, b_label = generate_negative_image( self.h, self.w, random_state, 0) #data, instance_label, b_label = generate_negative_image(self.h, self.w, random_state) self.images.append(data) self.bag_labels.append(b_label) self.instance_labels.append(instance_label) self.features = ravel_image(self.images) self.instance_labels = ravel_image(self.instance_labels) self.features, self.bag_labels, self.instance_labels = shuffle_dataset( self.features, self.bag_labels, self.rs, self.instance_labels) self.random_shuffle()
def random_shuffle(self): self.features, self.bag_labels = shuffle_dataset(self.features, self.bag_labels, self.rs) x_train, x_test, y_train, y_test = train_test_split(self.features, self.bag_labels) self.training_data = x_train self.testing_data = x_test self.training_labels = y_train self.testing_labels = y_test
def get_dataset(random_seed): """ Fetches dataset from precalculated descriptors. :return: features, bag_labels """ list_of_positive_testing_bags = [] list_of_negative_testing_bags = [] list_of_positive_training_bags = [] list_of_negative_training_bags = [] features = [] # list for all bags (list containing instances) bag_labels = [] # list for all bag labels # Appends all pkl files to list of their for file in os.listdir( "/home/dub/python/multiple_instance_learning/data/synthetic/testing/normal/" ): if file.endswith('.pkl'): list_of_negative_testing_bags.append( os.path.join( "/home/dub/python/multiple_instance_learning/data/synthetic/testing/normal/", file)) for file in os.listdir( '/home/dub/python/multiple_instance_learning/data/synthetic/testing/tumor/' ): if file.endswith('.pkl'): list_of_positive_testing_bags.append( os.path.join( "/home/dub/python/multiple_instance_learning/data/synthetic/testing/tumor/", file)) for file in os.listdir( '/home/dub/python/multiple_instance_learning/data/synthetic/training/normal/' ): if file.endswith('.pkl'): list_of_negative_training_bags.append( os.path.join( "/home/dub/python/multiple_instance_learning/data/synthetic/training/normal/", file)) for file in os.listdir( '/home/dub/python/multiple_instance_learning/data/synthetic/training/tumor/' ): if file.endswith('.pkl'): list_of_positive_training_bags.append( os.path.join( "/home/dub/python/multiple_instance_learning/data/synthetic/training/tumor/", file)) # appends all instances and bag labels into lists for instance in list_of_positive_training_bags: # positive instances with open(instance, 'rb') as tfh: tsd = pickle.load(tfh) if np.array(len(tsd['instances'][0])) == 16: features.append(np.array(tsd['instances'])) bag_labels.append(tsd['bag_label']) for instance in list_of_negative_training_bags: # negative instances with open(instance, 'rb') as tfh: tsd = pickle.load(tfh) if np.array(len(tsd['instances'][0])) == 16: features.append(np.array(tsd['instances'])) bag_labels.append(-tsd['bag_label']) for instance in list_of_positive_testing_bags: # positive instances with open(instance, 'rb') as tfh: tsd = pickle.load(tfh) if np.array(len(tsd['instances'][0])) == 16: features.append(np.array(tsd['instances'])) bag_labels.append(-tsd['bag_label']) for instance in list_of_negative_testing_bags: # negative instances with open(instance, 'rb') as tfh: tsd = pickle.load(tfh) if np.array(len(tsd['instances'][0])) == 16: features.append(np.array(tsd['instances'])) bag_labels.append(tsd['bag_label']) features, bag_label = shuffle_dataset(features, bag_label, random_seed) return features, bag_labels