def get_feature_map(idx): model = resnet18Conv5().cuda() mnist = MNIST(download=True, train=True, root=".").train_data.float() data_transform = Compose([ Resize((224, 224)), ToTensor(), Normalize((mnist.mean() / 255, ), (mnist.std() / 255, )) ]) mnist_dataset = MNIST(download=True, root=".", transform=data_transform, train=True) print(len(mnist)) data = mnist_dataset[idx] print("data shape = ", data[0].shape) # data shape 1x224x224\ print("data label = ", data[1]) # label number X = data[0].cuda() X = X.unsqueeze(0) outputs = model(X) outputs = outputs.detach().cpu().numpy() print(outputs.shape) return outputs
def get_data_loaders(train_batch_size=256, val_batch_size=64): mnist = MNIST(download=True, train=True, root=".").train_data.float() # Ugly method to hardcode convert 1 channel grayscale to 3 channel RGB-like # but it is still gray input (due to MNIST from torchvision) data_transform = Compose([ Resize((224, 224)), Grayscale(3), ToTensor(), Normalize((mnist.mean() / 255, ), (mnist.std() / 255, )) ]) train_loader = DataLoader(MNIST(download=True, root=".", transform=data_transform, train=True), batch_size=train_batch_size, shuffle=True) val_loader = DataLoader(MNIST(download=True, root=".", transform=data_transform, train=False), batch_size=val_batch_size, shuffle=False) return train_loader, val_loader
def get_data_loaders(train_batch_size, val_batch_size): mnist = MNIST(download=False, train=True, root=".").train_data.float() data_transform = Compose([ Resize((224, 224)),ToTensor(), Normalize((mnist.mean()/255,), (mnist.std()/255,))]) train_loader = DataLoader(MnistDataset(root_dir=''), batch_size=train_batch_size, shuffle=True) val_loader = DataLoader(MnistDataset(root_dir='', training=False), batch_size=val_batch_size, shuffle=False) return train_loader, val_loader
def get_feature_map(idx): model = resnet18fc1000().cuda() mnist = MNIST(download=True, train=True, root=".").train_data.float() data_transform = Compose([ Resize((224, 224)),ToTensor(), Normalize((mnist.mean()/255,), (mnist.std()/255,))]) mnist_dataset = MNIST(download=True, root=".", transform=data_transform, train=True) data = mnist_dataset[idx] X = data[0].cuda() X = X.unsqueeze(0) outputs = model(X) return outputs
def get_data_loaders(train_batch_size, val_batch_size): mnist = MNIST(download=False, train=True, root="./raw/").train_data.float() train_data_transform = Compose([ RandomAffine(degrees=30, translate=(0.15, 0.15), scale=(0.9, 1.1), shear=10), Resize((48, 48)), ToTensor(), Normalize((mnist.mean() / 255, ), (mnist.std() / 255, )) ]) valid_data_transform = Compose([ #RandomAffine(degrees=6, translate=(0.15, 0.15)), Resize((48, 48)), ToTensor(), Normalize((mnist.mean() / 255, ), (mnist.std() / 255, )) ]) train_loader = DataLoader(MNIST(download=False, root=".", transform=train_data_transform, train=True), batch_size=train_batch_size, shuffle=True, num_workers=4) val_loader = DataLoader(MNIST(download=False, root=".", transform=valid_data_transform, train=False), batch_size=val_batch_size, shuffle=False, num_workers=4) return train_loader, val_loader
import torch import matplotlib.pyplot as plt from biolayer import BioLinear from visualization import LinearLayerVisualizer from torchvision.datasets import MNIST MNIST_DIR = '~/DataSets/' Nc = 10 # num. of classes N = 784 # Sample size Nep = 300 # Number of epochs Num = 100 # Batch size eps0 = 2e-3 # Learning rate Kx=5 Ky=5 hid=Kx*Ky # number of hidden units that are displayed in Ky by Kx array M = MNIST(MNIST_DIR, download=True).data.detach().view(-1, 28*28).float() M -= M.mean(0) bio_linear = BioLinear(N, hid) vis = LinearLayerVisualizer(bio_linear, as_heatmap=True) try: for weight in bio_linear.train(M, batch_size=Num, epsilon=eps0): vis.update() except KeyboardInterrupt: vis.close()
import torch ### Load dataset from torchvision.datasets import MNIST from resnet_vis import * import os from sklearn.multiclass import OneVsRestClassifier from sklearn.model_selection import train_test_split, GridSearchCV #os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp') #os.environ['CUDA_VISIBLE_DEVICES'] = str(np.argmax([int(x.split()[2]) for x in open('tmp', 'r').readlines()])) #os.system('rm tmp') mnist_train = MNIST(download=True, train=True, root=".").train_data.float() data_transform = Compose([ Resize((224, 224)), ToTensor(), Normalize((mnist_train.mean() / 255, ), (mnist_train.std() / 255, )) ]) mnist_dataset_train = MNIST(download=True, root=".", transform=data_transform, train=True) mnist_dataset_test = MNIST(download=True, train=False, root=".", transform=data_transform) #### Load ResNet 18 model to get features resnet_model = resnet18Conv5().cuda() ### Generate train features
class MyModel: def __init__(self): self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.model = MnistResNet().to(self.device) try: self.model.load_state_dict( torch.load("./model/mnist_model_46_36.pth", map_location="cpu")) except: self.model.load_state_dict( torch.load("mnist_model_46_36.pth", map_location="cpu")) self.model.eval() self.mnist = MNIST(download=True, train=True, root=".").train_data.float() self.test_transforms = Compose([ Resize(((46, 46))), ToTensor(), Normalize((self.mnist.mean() / 255, ), (self.mnist.std() / 255, )) ]) self.BINARY_THREHOLD = 180 def finetune(self): for param in self.model.parameters(): param.require_grad = False print(self.model) def predict(self, image): with torch.no_grad(): self.model.eval() image_tensor = self.test_transforms(image).float() image_tensor = image_tensor.unsqueeze_(0) input_ = Variable(image_tensor) input_ = input_.to(self.device) output = self.model(input_) index = output.data.cpu().numpy().argmax() return index def image_smoothening(self, img): ret1, th1 = cv2.threshold(img, self.BINARY_THREHOLD, 255, cv2.THRESH_BINARY) ret2, th2 = cv2.threshold(th1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) blur = cv2.GaussianBlur(th2, (1, 1), 0) ret3, th3 = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) return th3 def remove_noise_and_smooth(self, file_name): img = cv2.imread(file_name, 0) h, w = img.shape #img[img <128] = 0 img = img[int(w * 0.2):int(w * 0.9), int(h * 0.2):int(h * 0.9)] filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 41) kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) img = self.image_smoothening(img) or_image = cv2.bitwise_or(img, closing) return or_image def remove_noise_and_smooth_numpy(self, image_np): img = cv2.cvtColor(image_np, cv2.COLOR_BGR2GRAY) h, w = img.shape #img[img <128] = 0 img = img[int(w * 0.2):int(w * 0.9), int(h * 0.2):int(h * 0.9)] filtered = cv2.adaptiveThreshold(img.astype(np.uint8), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 9, 41) kernel = np.ones((1, 1), np.uint8) opening = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel) closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel) img = self.image_smoothening(img) or_image = cv2.bitwise_or(img, closing) return or_image def preprocess_image(self, image): image = cv2.GaussianBlur(image, (7, 7), 0) h, w, c = image.shape image[image < 128] = 0 # Convert BGR to HSV hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) lower_val = np.array([110, 25, 50]) upper_val = np.array([255, 255, 255]) # Threshold the HSV image to get only black colors mask = cv2.inRange(hsv, lower_val, upper_val) # Bitwise-AND mask and original image image = cv2.bitwise_and(image, image, mask=mask) # invert the mask to get black letters on white background image = cv2.bitwise_not(image) #image = image[int(w*0.2): int(w*0.9),int(h*0.2): int(h*0.8)] #rimage = cv2.resize(image,(28,28)) #rimage= np.moveaxis(rimage, 2, 0) #resized_image = np.reshape(rimage,(28,28,1)) im_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # rimage = np.expand_dims(im_gray, axis=0) # rimage= np.rollaxis(rimage, 3, 1) # return im_gray def main(self): file_in = "211851-1-2 copy.jpg" #image = cv2.imread(file_in) image = self.remove_noise_and_smooth(file_in) #image = preprocess_image(image) cv2.imwrite("test.jpg", image) to_pil = transforms.ToPILImage() pil_image = to_pil(image) index = predict(pil_image) print(index) def main_prediction(self, image_np): image_np = self.remove_noise_and_smooth_numpy(image_np) to_pil = transforms.ToPILImage() pil_image = to_pil(image_np) index = self.predict(pil_image) return index