def __init__(self, cnn_model, input_dim, hidden_dim, lstm_layers, embedding_dim, batch_size, sequence_len): super(Encoder, self).__init__() # Convolutions (pre-trained) self.cnn_embedding_dim = None if (cnn_model == "vgg"): self.cnn = models.vgg16(pretrained=True).features self.cnn_embedding_dim = 25088 elif (cnn_model == "resnet"): self.cnn = models.resnet(pretrained=True).features self.cnn_embedding_dim = 1024 self.fc1 = nn.Linear(self.cnn_embedding_dim, input_dim) self.fc2 = nn.Linear(hidden_dim, embedding_dim) # Activations self.relu = nn.ReLU() self.leaky_relu = nn.LeakyReLU(0.2) # LSTM self.LSTM = nn.LSTM( input_size=input_dim, hidden_size=hidden_dim, num_layers=lstm_layers, batch_first= True # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) ) self.sequence_len = sequence_len self.batch_size = batch_size
def create_model(arch, hidden_units, learning_rate): if arch == 'vgg16': model = models.vgg16(pretrained=True) in_features = model.classifier[0].in_features for param in model.parameters(): param.requires_grad = False elif arch == 'resnet': model = models.resnet(pretrained=True) in_features = model.fc.in_features(pretrained=True) for param in model.parameters(): param.requires_grad = False elif arch == 'densenet121': model = models.densenet121(pretrained=True) in_features = model.classifier.in_features for param in model.parameters(): param.requires_grad = False else: print('The required architecture cannot be recognised') classifier = nn.Sequential( OrderedDict([('fc1', nn.Linear(in_features, hidden_units)), ('drop', nn.Dropout(p=0.5)), ('relu', nn.ReLU()), ('fc2', nn.Linear(hidden_units, 102)), ('output', nn.LogSoftmax(dim=1))])) model.classifier = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) scheduler = lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.1, last_epoch=-1) return model, criterion, optimizer, scheduler, in_features
def __init__(self,embed_size): super(CNN_Encoder,self).__init__() # Load the pretrained ResNet-152 model and replace the top fc layer resnet = models.resnet(pretrained=True) modules = list[resnet.children()[:,-1]] self.resnet = nn.Sequential(*modules) self.linear = nn.Linear(resnet.fc.in_features,embed_size) self.bn = nn.BatchNorm2D(embed_size,momentum = 0.01)
def test_resnet_parsing(self): if __name__ == '__main__': resnet50 = resnet() resnet_in = torch.rand((1, 3, 512, 512)) # testing on unet bonsai_parsed_model = bonsai_parser(resnet50, resnet_in) bonsai_parsed_model.summary() # prints model cfg summary bonsai_parsed_model.save_cfg( 'example_models/configs/resnet50_from_pytorch.cfg')
def __init__(self, img_path, plot_res=True, to_onnx=False): self.device = "cuda:0" self.model = resnet() self.model_name = "resnet{}.pth".format(num) self.model.load_state_dict( torch.load( os.path.join(model_folder, "pth/{}".format(self.model_name)))) self.model.cuda() self.to_onnx = to_onnx self.input_tensor = input_dim_3to4(image_normalize(img_path)) self.plot = plot_res self.img = cv2.imread(img_path)
def load_build(filepath): checkpoint = torch.load(filepath) if in_arg.arch == 'vgg16': model = models.vgg16(pretrained=True) else: model = models.resnet(pretrained=True) model.classifier = checkpoint['classifier'] learning_rate = checkpoint['learning_rate'] batch_size = checkpoint['batch_size'] model.load_state_dict(checkpoint['state_dict']) model.class_to_idx = checkpoint['class_to_idx'] return model
def __int__(self, class_num=62): super(Res18, self).__int__() model_ft = resnet(BasicBlock, [2, 2, 2, 2]) self.base_model = nn.Sequential(*list(model_ft.children())[:-3]) # attention self.avgpool = nn.AdaptiveAvgPool2d(1) self.maxpool = nn.AdaptiveAvgPool2d(1) self.sign = nn.Sigmoid() in_plances = 256 ratio = 8 self.a_fc1 = nn.Conv2d(in_plances, in_plances // ratio, 1, bias=False) self.a_relu = nn.ReLU() self.a_fc2 = nn.Conv2d(in_plances // ratio, in_plances, 1, bias=False) self.avg_pool = nn.AdaptiveAvgPool2d(1) self.max_pool = nn.AdaptiveAvgPool2d(1) self.reduce_layer = nn.Conv2d(512, 256, 1) self.fc1 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num)) self.fc2 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num)) self.fc3 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num)) self.fc4 = nn.Sequential(nn.Dropout(0.5), nn.Linear(256, class_num))
def __init__(self, cnn_model, input_dim, hidden_dim, lstm_layers, embedding_dim, sequence_len): super(Encoder, self).__init__() # Convolutions (pre-trained) self.cnn_embedding_dim = None if (cnn_model == "vgg"): self.cnn = models.vgg16(pretrained=True).features self.cnn_embedding_dim = 25088 elif (cnn_model == "resnet"): self.cnn = models.resnet(pretrained=True).features self.cnn_embedding_dim = 1024 self.fc1 = nn.Linear(self.cnn_embedding_dim, input_dim) self.fc2 = nn.Linear(hidden_dim, embedding_dim) self.unpool_1 = nn.Upsample(scale_factor=5, mode='bilinear') self.deconv_1 = nn.ConvTranspose2d(in_channels=8, out_channels=3, kernel_size=200, stride=1) #Activations self.relu = nn.ReLU() self.leaky_relu = nn.LeakyReLU(0.2) #LSTM self.LSTM = nn.LSTM( input_size=input_dim, hidden_size=hidden_dim, num_layers=lstm_layers, batch_first= True # input & output will has batch size as 1s dimension. e.g. (batch, time_step, input_size) ) self.sequence_len = sequence_len
def __init__(self, num_classes=4): super().__init__() self.pre = ClassicCNN.classic_cnn_block(1, 3) self.resnet = resnet(pretrained=True) self.resnet.fc = nn.Linear(self.resnet.fc.in_features, num_classes)
def main(): #load a pre trained network (VGG16 or resnet) if in_arg.arch == 'vgg16': model = models.vgg16(pretrained=True) else: model = models.resnet(pretrained=True) num_ftrs = model.classifier[0].in_features print(num_ftrs) #used as first layer of classifier #Build the model (pretrained model whose classifier is replaced with the custom classifier) input_size = num_ftrs output_size = 102 #this size would be different for a different image dataset but we assume Flowers classifier = Network(input_size, output_size, hidden_layers) model.classifier = classifier #replace pretrained model classifer with custom print(model) #print the model to make sure it is correct # Freeze model parameters so we don't backpropogate through them for param in model.parameters(): param.requires_grad = False for paramc in model.classifier.parameters(): paramc.requires_grad = True #load the dataset from utilities.py train_dataset = load_data('train') trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=102, shuffle=True) test_dataset = load_data('test') testloader = torch.utils.data.DataLoader(test_dataset, batch_size=102, shuffle=True) valid_dataset = load_data('valid') validloader = torch.utils.data.DataLoader(valid_dataset, batch_size=102, shuffle=True) # image classes is a list of strings such as '17' of length 102 since there are 102 class numbers image_classes = train_dataset.classes num_classes = (len(image_classes)) #set up parameters for forward pass through the network print_every = 2 steps = 0 criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), in_arg.lr) #FORWARD PROPOGATION #send images and model to CPU or GPU depending on GPU available and input argument cuda = torch.cuda.is_available() if in_arg.gpu == 'gpu' and cuda: device = torch.device('cuda') model.cuda() FloatTensor = torch.cuda.FloatTensor else: device = torch.device('cpu') model.cpu() FloatTensor = torch.FloatTensor for e in range(in_arg.epoch): running_loss = 0 model.train() #make sure model is in train mode for images, labels in iter(trainloader): steps += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() #clear the gradients output = model.forward(images) #forward pass for training loss = criterion(output, labels) #compute pass loss loss.backward() #backward pass optimizer.step() #optimize running_loss += loss.item() #during training, run test images through model and get accuracy if steps % print_every == 0: with torch.no_grad(): #turn off gradients for validation test_loss, accuracy = testing_pass(model, testloader, criterion) print( 'epoch: ', e, 'Training Loss: {:.4f}'.format(running_loss / print_every), 'Testing Loss: {:.4f}'.format(test_loss / len(testloader)), 'Testing Accuracy: {:.4f}'.format(accuracy / len(testloader))) running_loss = 0 model.train( ) #turn training mode back on for next forward train pass # Save the checkpoint model.class_to_idx = train_dataset.class_to_idx checkpoint = { 'input_size': input_size, 'output_size': 102, 'learning_rate': in_arg.lr, 'batch_size': 102, 'classifier': classifier, 'epochs': in_arg.epoch, 'classifier_state_dict': model.classifier.state_dict(), 'class_to_idx': model.class_to_idx } torch.save(checkpoint, 'checkpoint.pth')
from torchvision.models import resnet50 as resnet # torch.backends.cudnn.benchmark = True environ["CUDA_LAUNCH_BLOCKING"] = "1" minibatch_size = 1 if len(argv) > 1: minibatch_size = int(argv[1]) # if True: with torch.no_grad(): x = torch.zeros(minibatch_size, 3, 224, 224) print("input shape:", x.shape) t0 = time() model = resnet(pretrained=False).eval() t1 = time() print("instantiation time:", (t1 - t0) * 1000, "ms") num_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print("parameters:", num_params) input() t0 = time() model = model.cuda() x = x.cuda() model(x) t1 = time() print("1st inference time:", (t1 - t0) * 1000, "ms") input() times = []
def __init__(self, output, resnet_depth = 34, imChannels = 1, alphabet_length = 11, labels = "_1234567890 ", last_epoch = 0 ): super(Model, self).__init__() self.out_path = output self.last_epoch = last_epoch # save model parameters for checkpointer self.modelparams = {"output" : output, "resnet_depth" : resnet_depth, "imChannels" : imChannels, "alphabet_length" : alphabet_length, "labels" : labels} # ----- MODEL ------ # same as resnet input layer, but with support for grayscale input self.input = nn.Conv2d(imChannels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) self.bn1 = nn.BatchNorm2d(64) self.bn2 = nn.BatchNorm2d(alphabet_length) # leave first maxpool out to get more ouput dimensions, since # output string maximum length is limited by this self.relu = nn.ReLU(inplace=True) if resnet_depth == 18: from torchvision.models import resnet18 as resnet resnet_output_size = 256 elif resnet_depth == 34: from torchvision.models import resnet34 as resnet resnet_output_size = 256 elif resnet_depth == 50: from torchvision.models import resnet50 as resnet resnet_output_size = 1024 else: raise NotImplementedError("Only resnet depths of 18, 34, and 50 are supported.") # some layers are also removed from the end, since small images # result in negative filter sizes on those layers, and training crashes resnet_orig = resnet() resnet_no_top = nn.Sequential(*list(resnet_orig.children())[4:-3]) self.resnet = resnet_no_top self.dropout = nn.Dropout2d() conv1 = nn.Conv2d(resnet_output_size, int(resnet_output_size/2), 2) conv2 = nn.Conv2d(int(resnet_output_size/2), int(resnet_output_size/4), 2) conv3 = nn.Conv2d(int(resnet_output_size/4), alphabet_length, 1) # svhn and coco-text datasets are supported, you might have to change # these layers if using other datasests if alphabet_length == 11: #svhn parameters self.conv_out = nn.Sequential(conv1, self.relu, self.dropout, conv2, self.relu, self.dropout, conv3, self.bn2, self.relu) else: # coco-text parameters conv3 = nn.Conv2d(int(resnet_output_size), alphabet_length, 1) self.conv_out = nn.Sequential(self.dropout, conv3, self.bn2, self.relu) n_param = self.count_parameters() print("\nModel initialized, {} trainable parameters".format(n_param)) # ------ UTILITIES ------ self.labels = labels self.decoder = GreedyDecoder(self.labels) self.loss_fcn = CTCLoss() # ------ INIT DEVICES ------- self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if self.device == "cpu": print("GPU not found, using CPU ...") else: if torch.cuda.device_count() > 1: print("Using {} cuda devices ...".format(torch.cuda.device_count())) self.input = nn.DataParallel(self.conv_out) self.resnet = nn.DataParallel(self.resnet) self.relu = nn.DataParallel(self.relu) self.dropout = nn.DataParallel(self.dropout) self.conv_out = nn.DataParallel(self.conv_out) else: print("using {} ...".format(self.device))
if not nsml.IS_ON_NSML: DATASET_PATH = os.path.join('/home/kwpark_mk2/airush2_temp') DATASET_NAME = 'airush2_temp' print('use local gpu...!') use_nsml = False else: DATASET_PATH = os.path.join(nsml.DATASET_PATH) print('start using nsml...!') print('DATASET_PATH: ', DATASET_PATH) use_nsml = True from torch.autograd import Variable resnet50 = models.resnet(pretrained=True) modules = list(resnet50.children())[:-1] resnet50 = nn.Sequential(*modules) for p in resnet50.parameters(): p.requires_grad = False import glob def main(args): model = resnet50 model = model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
def __init__(self, cuda): super(res_net, self).__init__() self.cuda = cuda self.net = resnet(pretrained=True) """Delete the avg pooling and fc layer""" self.net = nn.Sequential(*list(self.net.children())[:-2]) # self.net.requires_grad = False print(self.net) """Decoder""" """Block 5""" self.decoder_block0 = nn.Sequential(*[ nn.ConvTranspose2d(512, 64, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(64), nn.ConvTranspose2d( 64, 64, kernel_size=5, stride=2, padding=2, output_padding=1), # upsampling nn.BatchNorm2d(64), nn.ConvTranspose2d(64, 512, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(512) ]) """Block 6""" self.decoder_block1 = nn.Sequential(*[ nn.ConvTranspose2d(512, 64, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(64), nn.ConvTranspose2d( 64, 64, kernel_size=5, stride=2, padding=2, output_padding=1), # upsampling nn.BatchNorm2d(64), nn.ConvTranspose2d(64, 256, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(256), ]) """Block 7""" self.decoder_block2 = nn.Sequential(*[ nn.ConvTranspose2d(256, 64, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(64), nn.ConvTranspose2d( 64, 64, kernel_size=5, stride=2, padding=2, output_padding=1), # upsampling nn.BatchNorm2d(64), nn.ConvTranspose2d(64, 256, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(256), ]) """Block 8""" self.decoder_block3 = nn.Sequential(*[ nn.ConvTranspose2d(256, 64, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(64), nn.ConvTranspose2d( 64, 64, kernel_size=5, stride=2, padding=2, output_padding=1), # upsampling nn.BatchNorm2d(64), nn.ConvTranspose2d(64, 128, kernel_size=1, stride=1, padding=0), nn.BatchNorm2d(128), ]) """Block 9""" self.decoder_block4 = nn.Sequential(*[ nn.ConvTranspose2d( 128, 64, kernel_size=5, stride=2, padding=2, output_padding=1), # upsampling nn.BatchNorm2d(64), nn.ConvTranspose2d(64, 1, kernel_size=1, stride=1), ])