class Scaler(object): def __init__(self, root_dir, train_csv, resize, scaler= "std"): if scaler == "minmax": self.online_scaler = MinMaxScaler() elif scaler == "std": self.online_scaler = StandardScaler() elif scaler == "robust": self.online_scaler = RobustScaler() self.root_dir = root_dir self.train_csv = train_csv self.resize = resize # fit training data self.fit_data(self.train_csv, self.root_dir, self.online_scaler) def __call__(self, image): img = np.asarray(image, dtype=np.float) x, y = img.shape img = img.reshape(1, x * y) img = self.online_scaler.transform(img) img = img.reshape(x, y, 1) img = np.uint8(img*255) img = np.clip(img, 0, 255) return img def fit_data(self, csv_path, root_dir, online_scaler): print("======> fitting training data") csv_data = pd.read_csv(self.train_csv) for row in csv_data.itertuples(): img_name = os.path.join(self.root_dir, row[1]) image = Image.open(img_name) image = image.resize((self.resize, self.resize)) image = np.asarray(image, dtype= np.float) x, y = image.shape image = image.reshape(1, x*y) self.online_scaler.partial_fit(image) # function modifies original data parameter to save memory def save_images(self, path, data, label): print("======> saving data") data = data.reshape((nsamples, 224, 224)) for im, img_name, label in zip(data, label): if not os.path.exists(path + str(label)): os.makedirs(path + str(label)) im = Image.fromarray(np.uint8(im*255)) # multiply to 255 to save image so the image can be used by ToTensor() in pytorch im.save(os.path.join(path, label, img_name))
class ListScaler(BaseEstimator, TransformerMixin): ''' Given a list of pointclouds, applies a given scaler partially, then transforms partially Parameters ---------- scaler : float, default 2 base of the logarithm. ''' def __init__(self, scaler="standart"): assert scaler in [ "standart", "normal", "min_max", "max_abs", "robust", "log" ] assert scaler != 'log', NotImplementedError if scaler == "standart": from sklearn.preprocessing import StandardScaler self.scaler = StandardScaler() elif scaler == 'normal': from sklearn.preprocessing import Normalizer self.scaler = Normalizer() elif scaler == 'min_max': from sklearn.preprocessing import MinMaxScaler self.scaler = MinMaxScaler() elif scaler == 'max_abs': from sklearn.preprocessing import MaxAbsScaler self.scaler = MaxAbsScaler() elif scaler == 'robust': from sklearn.preprocessing import RobustScaler self.scaler = RobustScaler() def fit(self, X, y=None): '''Scaler partial fitting''' segments = X for segment in segments: self.scaler.partial_fit(segment) # in case of batch learning, scaler should be fit to the whole dataset with open(os.path.join(TEMP_DIR, 'total_scaler.csv'), "wb") as f: # Pickling pickle.dump(self.scaler, f) return self def partial_train(self, X): '''in case of batch learning, scaler should be fitted to the whole dataset''' segments = X # load total_scaler with open(os.path.join(TEMP_DIR, 'total_scaler.csv'), "rb") as f: # Unpickling total_scaler = pickle.load(f) # fit for segment in segments: total_scaler.partial_fit(segment) # save with open(os.path.join(TEMP_DIR, 'total_scaler.csv'), "wb") as f: # Pickling pickle.dump(total_scaler, f) def transform(self, X): '''ListScaler transforming''' segments = X new_segments = [] for segment in segments: new_segments.append(self.scaler.transform(segment)) return self def total_transform(self, X): '''ListScaler transforming''' segments = X # load with open(os.path.join(TEMP_DIR, 'total_scaler.csv'), "rb") as f: # Unpickling total_scaler = pickle.load(f) # transform new_segments = [] for segment in segments: new_segments.append(total_scaler.transform(segment)) return self