def test_model(sess): input_placeholder = tf.placeholder(dtype=tf.float32, shape=[1, 440, 440, 3], name='input_images') gen = SpriteGenerator(input_placeholder, 'SpriteGenerator') vgg = VGG16(gen.output, 'train_vgg') vgg = VGG16(input_placeholder, 'train_ref')
def get_model(model_name): if model_name=='CNN': return CNN() if model_name=='CNN_GAP': return CNN(GAP=True) if model_name=='VGG16': return VGG16(batch_norm=False) if model_name=='VGG11_BN': return VGG11(batch_norm=True) if model_name=='VGG13_BN': return VGG13(batch_norm=True) if model_name=='VGG16_BN': return VGG16(batch_norm=True) if model_name=='VGG11_GAP': return VGG11(batch_norm=True, GAP=True) if model_name=='VGG13_GAP': return VGG13(batch_norm=True, GAP=True) if model_name=='VGG16_GAP': return VGG16(batch_norm=True, GAP=True) if model_name=='ResNet18': return ResNet18() if model_name=='ResNet34': return ResNet34() if model_name=='ResNet50': return ResNet50() if model_name=='ResNet101': return ResNet101() raise NotImplementedError('Model has not been implement.')
def __init__(self, n_classes): super(SSD300, self).__init__() self.n_classes = n_classes self.Base = VGG16() self.Extra = nn.Sequential(OrderedDict([ ('extra1_1', nn.Conv2d(1024, 256, 1)), ('extra1_2', nn.Conv2d(256, 512, 3, padding=1, stride=2)), ('extra2_1', nn.Conv2d(512, 128, 1)), ('extra2_2', nn.Conv2d(128, 256, 3, padding=1, stride=2)), ('extra3_1', nn.Conv2d(256, 128, 1)), ('extra3_2', nn.Conv2d(128, 256, 3)), ('extra4_1', nn.Conv2d(256, 128, 1)), ('extra4_2', nn.Conv2d(128, 256, 3)), ])) self.pred_layers = ['conv4_3', 'conv7', 'extra1_2', 'extra2_2', 'extra3_2', 'extra4_2'] n_channels = [512, 1024, 512, 256, 256, 256] self.L2Norm = nn.ModuleList([L2Norm(512, 20)]) self.norm_layers = ['conv4_3'] # decrease prediction layers' influence on backbone self.Loc = nn.ModuleList([]) self.Conf = nn.ModuleList([]) for i, ar in enumerate(self.config['aspect_ratios']): n = len(ar) + 1 self.Loc.append(nn.Conv2d(n_channels[i], n * 4, 3, padding=1)) self.Conf.append(nn.Conv2d(n_channels[i], n * (self.n_classes + 1), 3, padding=1)) self.relu = nn.ReLU()
def main(rank, world_size): setup(rank, world_size) device_cnt = torch.cuda.device_count() #print("use %d gpus" % (device_cnt)) n = device_cnt // world_size device_ids = list(range(rank * n, (rank + 1) * n)) model = VGG16().to(device_ids[0]) ddp_model = torch.nn.parallel.DistributedDataParallel( model, device_ids=device_ids) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(ddp_model.parameters(), lr=0.01) optimizer.zero_grad() inputs = torch.randn(64 * n, 3, 224, 224).to(device_ids[0]) labels = torch.randn(64 * n).to(device_ids[0]) for epoch in range(0, 2): outputs = ddp_model(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() cleanup()
def train(params, report_fn=None, start_new=False): print('Evaluating Target Style...') style_grams = eval_style(params) tf.reset_default_graph() sess = tf.InteractiveSession() print('Defining Input Pipeline...') input_images = create_tf_pipeline(params) print('Building Model...') input_shape = [params.batch_size] + params.input_shape input_images.set_shape(input_shape) gen = SpriteGenerator(input_images, 'SpriteGenerator') vggTrain = VGG16(gen.output, 'train_vgg') vggRef = VGG16(input_images, 'ref_vgg') print('Defining Losses...') J, train_step, J_content, J_style, global_step = total_loss( input_images, gen, vggTrain, vggRef, style_grams, params) with tf.name_scope('summaries'): tf.summary.scalar('loss', J) tf.summary.scalar('style_loss', J_style) tf.summary.scalar('content_loss', J_content) if start_new: print('Starting...') sess.run(tf.global_variables_initializer()) else: print('Continuing...') with tf.train.MonitoredTrainingSession( checkpoint_dir=params.save_path, log_step_count_steps=params.log_step, save_summaries_steps=params.summary_step) as sess: while not sess.should_stop(): _, total_cost, content_cost, style_cost = sess.run( [train_step, J, J_content, J_style]) if report_fn is not None: step = tf.train.global_step(sess, global_step) report_fn(params, step, total_cost, content_cost, style_cost) print('Done...')
def __init__(self, n_classes=1): super().__init__() self.n_classes = n_classes self.rolling_times = 4 self.rolling_ratio = 0.075 self.Base = VGG16() self.Extra = nn.Sequential(OrderedDict([ ('extra1_1', nn.Conv2d(1024, 256, 1)), ('extra1_2', nn.Conv2d(256, 256, 3, padding=1, stride=2)), ('extra2_1', nn.Conv2d(256, 128, 1)), ('extra2_2', nn.Conv2d(128, 256, 3, padding=1, stride=2)), ('extra3_1', nn.Conv2d(256, 128, 1)), ('extra3_2', nn.Conv2d(128, 256, 3, padding=1, stride=2))])) self.pred_layers = ['conv4_3', 'conv7', 'extra1_2', 'extra2_2', 'extra3_2'] self.L2Norm = nn.ModuleList([L2Norm(512, 20)]) self.l2norm_layers = ['conv4_3'] # intermediate layers self.Inter = nn.ModuleList([ nn.Sequential(nn.Conv2d(512, 256, 3, padding=1), nn.ReLU(inplace=True)) nn.Sequential(nn.Conv2d(1024, 256, 3, padding=1), nn.ReLU(inplace=True)) nn.Sequential(), nn.Sequential(), nn.Sequential()]) n_channels = [256, 256, 256, 256, 256] # Recurrent Rolling self.RollLeft = nn.ModuleList([]) self.RollRight = nn.ModuleList([]) self.Roll = nn.ModuleList([]) for i in range(len(n_channels)): n_out = int(n_channels[i] * self.rolling_ratio) if i > 0: self.RollLeft.append( nn.Sequential( nn.Conv2d(n_channels[i-1], n_out, 1), nn.ReLU(inplace=True), nn.MaxPool2d(2, ceil_mode=True))) if i < len(n_channels) - 1: self.RollRight.append( nn.Sequential( nn.Conv2d(n_channels[i+1], n_out, 1), nn.Relu(inplace=True), nn.ConvTranspose2d(n_out, n_out, kernel_size=4, stride=2, padding=1))) n_out = n_out * (int(i>0) + int(i<len(n_channels)-1)) self.Roll.append(nn.Sequential( nn.Conv2d(n_channels[i] + n_out, n_channels[i], 1), nn.ReLU(inplace=True))) # Prediction self.Loc = nn.ModuleList([]) self.Conf = nn.ModuleList([]) for i in range(len(n_channels)): n_boxes = len(self.config['aspect_ratios'][i]) + 1 self.Loc.append(nn.Conv2d(n_channels[i], n_boxes * 4, 3, padding=1)) self.Conf.append(nn.Conv2d(n_channels[i], n_boxes * (self.n_classes + 1), 3, padding=1))
def test_loss(sess): params = TrainingParams() style_grams = eval_style(params) tf.reset_default_graph() input_shape = [2, 256, 256, 3] input_placeholder = tf.placeholder(dtype=tf.float32, shape=input_shape, name='input_images') input_style = np.zeros([2, 256, 256, 3]) params = TrainingParams() gen = SpriteGenerator(input_placeholder, 'SpriteGenerator') vggTrain = VGG16(gen.output, 'train_vgg') vggRef = VGG16(input_placeholder, 'train_ref') J = total_loss(sess, input_placeholder, gen, vggTrain, vggRef, style_grams, params)
def __init__(self, sess, learning_rate=1e-4, stddev=0.01, n_bins=1, overlap=30., orientation_loss_weight=8., dim_loss_weight=4., weights_path='/', mode='train'): ''' Input: Learning Rate stddev : Standard Deviation n_bins : Number of bins for orientation prediction overlap : Overlap of bins in degrees for the orientation prediction orientation_loss_weight : weight of the orientation loss in total loss dim_loss_weight : weight of the dimension loss in total loss weights_path : Pretrained VGG weights_path ''' self.sess = sess #set input and output self.inputs = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='input') self.sin_placeholder = tf.placeholder(tf.float32, shape=[None, 1], name='output_sin') self.cos_placeholder = tf.placeholder(tf.float32, shape=[None, 1], name='output_cos') self.dim_placeholder = tf.placeholder(tf.float32, shape=[None, 3], name='output_dim') #Set hyperparameters self.set_hparams(learning_rate, stddev, n_bins, overlap, orientation_loss_weight, dim_loss_weight) #Set the VGG network with just convolution layers self.vgg = VGG16(self.sess, self.inputs, only_convolution=True) self.convolution_codes = self.vgg.convolution_codes #Initialize new fully connected module - 3D Module self.fully_connected_layer() self.set_losses() #Load weights to only convolution network - [VGG class automatically loads only convolution if only_convolution = True] tf.global_variables_initializer().run() if mode == 'train': self.vgg.load_weights(weights_path) self.saver = tf.train.Saver() self.iteration = 0
def __init__(self, args): super(Model, self).__init__() print("using backbone", args.backbone) if args.backbone == "vgg16_torch": self.feature_extractor = CNN() elif args.backbone == "vgg16_longcw": self.feature_extractor = VGG16() self.feature_extractor.load_from_npy_file( '../input/pretrained_model/VGG_imagenet.npy') self.rpn = RPN() self.fasterrcnn = FasterRcnn() self.proplayer = ProposalLayer(args=args) self.roipool = ROIpooling()
def main(args): cuda = torch.cuda.is_available() # dataset train_loader, test_loader = return_data(args) # model model = FCN32s(n_class=2) start_epoch = 0 start_iteration = 0 if args.resume: checkpoint = torch.load(args.name) model.load_state_dict(checkpoint) else: vgg16 = VGG16(pretrained=True) model.copy_params_from_vgg16(vgg16) if cuda: model = model.cuda() # optimizer if args.optimizer == 'SGD': optim = torch.optim.SGD( get_parameters(model, bias=False), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'adam': optim = torch.optim.Adam( get_parameters(model, bias=False), lr=args.lr, betas=(0.9, 0.999)) # train trainer = Trainer( cuda=cuda, model=model, optimizer=optim, train_loader=train_loader, val_loader=test_loader, epochs=200, size_average=False, name=args.name, loss=args.loss ) trainer.epoch = start_epoch trainer.iteration = start_iteration if args.mode == 'train': trainer.train() elif args.mode == 'test': trainer.draw()
def __init__(self, n_classes): super(RUN300, self).__init__() self.n_classes = n_classes self.Base = VGG16() self.Extra = nn.Sequential( OrderedDict([ ('extra1_1', nn.Conv2d(1024, 256, 1)), ('extra1_2', nn.Conv2d(256, 512, 3, padding=1, stride=2)), ('extra2_1', nn.Conv2d(512, 128, 1)), ('extra2_2', nn.Conv2d(128, 256, 3, padding=1, stride=2)), ('extra3_1', nn.Conv2d(256, 128, 1)), ('extra3_2', nn.Conv2d(128, 256, 3)), ('extra4_1', nn.Conv2d(256, 128, 1)), ('extra4_2', nn.Conv2d(128, 256, 3)) ])) self.pred_layers = [ 'conv4_3', 'conv7', 'extra1_2', 'extra2_2', 'extra3_2', 'extra4_2' ] n_channels = [512, 1024, 512, 256, 256, 256] self.L2Norm = nn.ModuleList([L2Norm(512, 20)]) self.l2norm_layers = ['conv4_3'] # Multi-Way Residual Blocks self.ResBlocks = nn.ModuleList() for i in range(len(n_channels) - 1): self.ResBlocks.append( ThreeWay(n_channels[i], n_channels[i + 1], self.config['grids'][i], self.config['grids'][i + 1], out_channels=256)) self.ResBlocks.append(TwoWay(n_channels[-1], out_channels=256)) # Unified Prediction Module n_boxes = len(self.config['aspect_ratios']) + 1 #self.Loc = nn.Conv2d(256, n_boxes * 4, 3, padding=1) #self.Conf = nn.Conv2d(256, n_boxes * (self.n_classes+1), 3, padding=1) self.Loc = nn.Sequential(nn.Conv2d(256, 256, 1), nn.ReLU(inplace=True), nn.Conv2d(256, n_boxes * 4, 3, padding=1)) self.Conf = nn.Sequential( nn.Conv2d(256, 256, 1), nn.ReLU(inplace=True), nn.Conv2d(256, n_boxes * (self.n_classes + 1), 3, padding=1))
def test_style_loss(): params = TrainingParams() params.train_path = 'data/starry_night.jpg' crop = False style_grams = eval_style(params, crop=crop) tf.reset_default_graph() sess = tf.InteractiveSession() input_image = process_img(params.train_path, params.input_shape[0:2] if crop else None, crop=crop).eval() input_image = tf.expand_dims(input_image, 0) gen = SpriteGenerator(input_image, 'SpriteGenerator') vggTrain = VGG16(gen.output, 'train_vgg') J_style = style_loss(vggTrain, style_grams, 1.0) cost = sess.run(J_style) print('%f' % (cost))
imgs2 = [] for i in img1: imgs1.append(cv2.imread(i)) for i in img2: imgs2.append(cv2.imread(i)) #print(imgs1[0]) from vgg import VGG16 from caffe_classes import class_names #placeholder for input and dropout rate x = tf.placeholder(tf.float32, [1, 224, 224, 3]) keep_prob = tf.placeholder(tf.float32) #create model with default config ( == no skip_layer and 1000 units in the last layer) model = VGG16(x, keep_prob, 5749, ['fc8']) #define activation of last layer as score score = model.fc7 #f7=model.fc7; saver=tf.train.Saver(); #create op to calculate softmax softmax=score #softmax = tf.nn.softmax(score) res=[] with tf.Session() as sess: # Initialize all variables sess.run(tf.global_variables_initializer())
def train(args): print("Start Time:\t{}".format(time.ctime())) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model1 = Transformer() model2 = Transformer() state_dict1 = torch.load(args.model1) state_dict2 = torch.load(args.model2) model1.load_state_dict(state_dict1) model2.load_state_dict(state_dict2) model1.to(device) model2.to(device) vgg = VGG16().to(device) train_dataset = datasets.ImageFolder( args.datapath, transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = Transformer(norm='instance', padding='reflect').to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() loss = [] run_time = time.strftime("%d-%H-%M-%S") for epoch_num in range(args.epochs): transformer.train() agg_one_loss = 0.0 agg_two_loss = 0.0 count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() content = x.to(device) y_hat = transformer(content) y_model1 = model1(content) y_model2 = model2(content) features_yh = vgg(normalize(y_hat)) features_y1 = vgg(normalize(y_model1)) features_y2 = vgg(normalize(y_model2)) # Do this but with losses from the output of the VGG blocks # one_loss = mse_loss(y_hat, y_model1) # two_loss = mse_loss(y_hat, y_model2) one_loss = sum( np.array([ mse_loss(feat_yh, feat_y1) for feat_yh, feat_y1 in zip( features_yh.values(), features_y1.values()) ])) two_loss = sum( np.array([ mse_loss(feat_yh, feat_y2) for feat_yh, feat_y2 in zip( features_yh.values(), features_y2.values()) ])) total_loss = one_loss + two_loss total_loss.backward() optimizer.step() agg_one_loss += one_loss.item() agg_two_loss += two_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "[{}/{}]\tTotal: {:.2f}\tModel 1: {:.2f}\tModel 2: {:.2f}".format( count, len(train_dataset), (agg_one_loss + agg_two_loss) / (batch_id + 1), agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), ) print(mesg) loss.append([ batch_id + 1, agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), (agg_one_loss + agg_two_loss) / (batch_id + 1) ]) if args.checkpoint_dir is not None and ( batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str( epoch_num + 1) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() save_loss_plot( np.array(loss), args.log_dir + '/train_loss{}.jpg'.format(run_time)) # save model and parameter log transformer.eval().cpu() if args.savename is None: save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.strftime("%d-%H-%M-%S")) + ".model" else: save_model_filename = args.savename save_model_path = os.path.join(args.save_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) # save loss in pickle file with open('{}/loss{}'.format(args.log_dir, run_time), 'wb') as fp: pickle.dump(loss, fp) with open('{}/param_log{}.txt'.format(args.log_dir, run_time), 'w') as f: f.write("Epochs: {}\n".format(args.epochs)) f.write("Batch Size: {}\n".format(args.batch_size)) f.write("Dataset: {}\n".format(args.datapath)) f.write("Learning Rate: {}\n".format(args.lr)) f.write("Model 1: {}\n".format(args.model1)) f.write("Model 2: {}\n".format(args.model2)) print("\nDone, trained model saved at", save_model_path)
transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) trainData = dsets.ImageFolder('~/data/train', transform) testData = dsets.ImageFolder('~/data/test', transform) trainLoader = torch.utils.data.DataLoader(dataset=trainData, batch_size=BATCH_SIZE, shuffle=True) testLoader = torch.utils.data.DataLoader(dataset=testData, batch_size=BATCH_SIZE, shuffle=False) vgg16 = VGG16(n_classes=N_CLASSES) vgg16.cuda() # Loss, Optimizer & Scheduler cost = tnn.CrossEntropyLoss() optimizer = torch.optim.Adam(vgg16.parameters(), lr=LEARNING_RATE) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) # Train the model for epoch in range(EPOCH): avg_loss = 0 cnt = 0 if 10 == epoch: LEARNING_RATE = 0.01 optimizer = torch.optim.Adam(vgg16.parameters(), lr=LEARNING_RATE)
def main(args): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # DATA # Transform and Dataloader for COCO dataset transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), # / 255. transforms.Lambda(lambda x: x.mul(255)) ]) train_dataset = datasets.ImageFolder(args.dataset, transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) # MODEL # Define Image Transformation Network with MSE loss and Adam optimizer transformer = TransformerNet().to(device) mse_loss = nn.MSELoss() optimizer = optim.Adam(transformer.parameters(), args.learning_rate) # Pretrained VGG vgg = VGG16(requires_grad=False).to(device) # FEATURES style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) # Load the style image style = Image.open(args.style) style = style_transform(style) style = style.repeat(args.batch_size, 1, 1, 1).to(device) # Compute the style features features_style = vgg(normalize_batch(style)) # Loop through VGG style layers to calculate Gram Matrix gram_style = [gram_matrix(y) for y in features_style] # TRAIN for epoch in range(args.epochs): transformer.train() agg_content_loss = 0. agg_style_loss = 0. for batch_id, (x, _) in tqdm(enumerate(train_loader), unit='batch'): x = x.to(device) n_batch = len(x) optimizer.zero_grad() # Parse throught Image Transformation network y = transformer(x) y = normalize_batch(y) x = normalize_batch(x) # Parse through VGG layers features_y = vgg(y) features_x = vgg(x) # Calculate content loss content_loss = args.content_weight * mse_loss( features_y.relu2_2, features_x.relu2_2) # Calculate style loss style_loss = 0. for ft_y, gm_s in zip(features_y, gram_style): gm_y = gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() # Monitor if (batch_id + 1) % args.log_interval == 0: tqdm.write('[{}] ({})\t' 'content: {:.6f}\t' 'style: {:.6f}\t' 'total: {:.6f}'.format( epoch + 1, batch_id + 1, agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1))) # Checkpoint if (batch_id + 1) % args.save_interval == 0: # eval mode transformer.eval().cpu() style_name = args.style.split('/')[-1].split('.')[0] checkpoint_file = os.path.join(args.checkpoint_dir, '{}.pth'.format(style_name)) tqdm.write('Checkpoint {}'.format(checkpoint_file)) torch.save(transformer.state_dict(), checkpoint_file) # back to train mode transformer.to(device).train()
def train(args): # Select GPU if available device = torch.device('cuda' if args.cuda else 'cpu') # Setting seeds np.random.seed(args.seed) torch.manual_seed(args.seed) # Transforms transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ]) # Datasets and Dataloaders train_dataset = datasets.ImageFolder(args.dataset, transform=transform) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # Load Network transformer = TransformNet().to(device) vgg = VGG16(False).to(device) # Optimizer optimizer = optim.Adam(transformer.parameters(), lr=args.lr) # Loss function mse_loss = nn.MSELoss() # Style features style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) style = load_image(args.style_image, size=args.style_size) style = style_transform(style) # Repeat tensor along the specified dimensions style = style.repeat(args.batch_size, 1, 1, 1).to(device) features_style = vgg(normalize_batch(style)) gram_style = [gram_matrix(x) for x in features_style] # Training loop for epoch in range(args.epochs): transformer.train() total_content_loss = 0.0 total_style_loss = 0.0 count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() # Output from transformer network -> y x = x.to(device) y = transformer(x) # Normalize batches (y-> output from transformer, x-> raw input) y = normalize_batch(y) x = normalize_batch(x) # Output from vgg model features_y = vgg(y) features_x = vgg(x) # Calculate content loss content_loss = args.content_weight * \ mse_loss(features_y.relu2_2, features_x.relu2_2) # Calculate style loss style_loss = 0.0 for ft_y, gm_s in zip(features_y, gram_style): gm_y = gram_matrix(ft_y) style_loss += mse_loss(gm_y, gm_s[:n_batch, :, :]) style_loss *= args.style_weight # Calculate batch loss total_loss = content_loss + style_loss total_loss.backward() optimizer.step() # Calculate total loss total_content_loss += content_loss.item() total_style_loss += style_loss.item() if ((batch_id + 1) % args.log_interval == 0): print( f'{time.ctime()}\tEpoch {epoch+1}:\t[{count}/{len(train_dataset)}]\tcontent: {total_content_loss / batch_id + 1}\tstyle: {total_style_loss / batch_id + 1}\ttotal: {(total_content_loss + total_style_loss) / (batch_id + 1)}' ) if (args.checkpoint_model_dir is not None and (batch_id + 1) % args.checkpoint_interval == 0): transformer.eval().cpu() ckpt_model_filename = 'ckpt_epoch_' + \ str(epoch) + "_batch_id_" + str(batch_id + 1) + '.pth' ckpt_model_path = os.path.join(args.checkpoint_model_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device) # Save model transformer.eval().cpu() save_model_filename = 'epoch_' + str(args.epochs) + '_' + str( time.ctime()).replace(' ', '_') + '_' + str( args.content_weight) + '_' + str(args.style_weight) + '.model' save_model_path = os.path.join(args.save_model_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) print('\nModel trained! It is saved at:', save_model_path)
# 数据归一化处理,调用前数据需处理成Tensor transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])) data_test = CIFAR10(dataset_path, train=False, download=True, transform=transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])) data_train_loader = DataLoader(data_train, batch_size=32, shuffle=True, num_workers=1) data_test_loader = DataLoader(data_test, batch_size=32, num_workers=1) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") net = VGG16(device, 10) net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=1e-5) def train(epoch): global cur_batch_win net.train() loss_list, batch_list = [], [] tic = time.time() for i, (images, labels) in enumerate(data_train_loader): output = net(images) loss = criterion(output, labels.to(device)) loss_list.append(loss.detach().cpu().item())
import os import cv2 from vgg import VGG16 os.environ['CUDA_VISIBLE_DEVICES'] = '0' VGG16_model = VGG16(model_path=MODEL_PATH, classes_names_path=CLASSES_NAMES_PATH) # ** predict image = cv2.imread(IMAGE_PATH) print(VGG16_model.infer(image)) # ** batch predict image_list = [] for image_name in os.listdir(FOLDER_PATH): image = cv2.imread(os.path.join(FOLDER_PATH, image_name)) image_list.append(image) print(VGG16_model.batch_infer(image_list))
def train(args): print("Start Time:\t{}".format(time.ctime())) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_dataset = datasets.ImageFolder( args.datapath, transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) vgg = VGG16().to(device) transformer = Transformer(norm='instance', padding='reflect').to(device) optimizer = Adam(transformer.parameters(), args.learning_rate) mse_loss = torch.nn.MSELoss() style_transform = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255))]) # allows for multiple styles style_images = args.stylepath.split(',') num_style = len(style_images) styles = load_image(style_images, size=args.style_size) styles = [style_transform(style) for style in styles] # This needs to be according to n_batch # match number of content features to calculate gram matrix losses styles = [ style.repeat(args.batch_size, 1, 1, 1).to(device) for style in styles ] # output of vgg is dictionary {"relu1": tensor, "relu2": tensor, ...} feature_styles = [vgg(normalize(style)).values() for style in styles] gram_style = [ gram_matrix(y) for feature_style in feature_styles for y in feature_style ] flag = True # used for alternating style images to use loss = [] run_time = time.strftime("%d-%H-%M-%S") for epoch_num in range(args.epochs): transformer.train() agg_content_loss = 0.0 agg_style_loss = 0.0 count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() y_content = x.to(device) y_hat = transformer(y_content) y_content = normalize(y_content) y_hat = normalize(y_hat) features_yc = vgg(y_content) features_yh = vgg(y_hat) content_loss = args.content_weight * mse_loss( features_yh["relu2"], features_yc["relu2"]) # gram matrix for each transformer network output gram_yh = [ gram_matrix(feature) for feature in features_yh.values() ] # entries adjusted for number of styles # <---------style1---------> <-------style2---------->, .... style_n # [relu1, relu2, relu3, relu4, relu1, relu2, relu3, ... ] gram_yh = [thing for _ in range(num_style) for thing in gram_yh] # Calculate style loss style_loss = [ mse_loss(G_yh, G_style[:n_batch, :, :]).cpu() for G_yh, G_style in zip(gram_yh, gram_style) ] # Interpolate between two styles if args.alpha is not None and num_style == 2: alpha_list = [args.alpha for _ in range(4)] # alpha*first image [alpha_list.append(1 - args.alpha) for _ in range(4)] # (1-alpha)*second image style_loss = [ alpha * loss for loss, alpha in zip(style_loss, alpha_list) ] # Alternating style image losses for training purposes if args.alt is not None: if count % (4 * args.alt) == 0: flag = not flag if flag: style_loss = sum(style_loss[:4]) # first image else: style_loss = sum(style_loss[4:]) # second image else: style_loss = sum(style_loss) / num_style # both images style_loss *= args.style_weight total_loss = content_loss + style_loss total_loss.backward() optimizer.step() agg_content_loss += content_loss.item() agg_style_loss += style_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "[{}/{}]\tTotal: {:.2f}\tStyle: {:.2f}\tContent: {:.2f}".format( count, len(train_dataset), (agg_content_loss + agg_style_loss) / (batch_id + 1), agg_style_loss / (batch_id + 1), agg_content_loss / (batch_id + 1), ) print(mesg, end='\r') loss.append([ batch_id + 1, agg_content_loss / (batch_id + 1), agg_style_loss / (batch_id + 1), (agg_content_loss + agg_style_loss) / (batch_id + 1) ]) if args.checkpoint_dir is not None and ( batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str( epoch_num + 1) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() save_loss_plot( np.array(loss), args.log_dir + '/train_loss{}.jpg'.format(run_time)) # save model and parameter log when training is finished transformer.eval().cpu() if args.savename is None: save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.strftime("%d-%H-%M-%S")) + "_" + str( args.content_weight) + "_" + str(args.style_weight) + ".model" else: save_model_filename = args.savename save_model_path = os.path.join(args.save_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) # save loss in pickle file with open('{}/loss{}'.format(args.log_dir, run_time), 'wb') as fp: pickle.dump(loss, fp) with open('{}/param_log{}.txt'.format(args.log_dir, run_time), 'w') as f: f.write("Epochs: {}\n".format(args.epochs)) f.write("Batch Size: {}\n".format(args.batch_size)) f.write("Dataset: {}\n".format(args.datapath)) f.write("Style Image: {}\n".format(args.stylepath)) f.write("Content Weight: {}\n".format(args.content_weight)) f.write("Style Weight: {}\n".format(args.style_weight)) f.write("Learning Rate: {}\n".format(args.learning_rate)) if args.alpha is not None: f.write("Alpha: {}\n".format(args.alpha)) if args.alt is not None: f.write("Alternation: {} batches".format(args.alt)) print("\nDone, trained model saved at", save_model_path)
num_classes = 5749 train_layers = ['conv5_3', 'fc6', 'fc7', 'fc8'] display_step = 128 filewriter_path = "/finetune_alexnet/dogs_vs_cats" checkpoint_path = "/finetune_alexnet/" if not os.path.isdir(checkpoint_path): os.mkdir(checkpoint_path) x = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) keep_prob = tf.placeholder(tf.float32) model = VGG16(x, keep_prob, num_classes, train_layers) score = model.fc8 var_list = [ v for v in tf.trainable_variables() if v.name.split('/')[0] in train_layers ] with tf.name_scope("cross_ent"): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=score, labels=y)) with tf.name_scope("train"): gradients = tf.gradients(loss, var_list) gradients = list(zip(gradients, var_list))
t_img = transforms.ToTensor()(img) t_img = normalize_mean( t_img, (104.00698793 / 255, 116.66876762 / 255, 122.67891434 / 255)) return t_img if __name__ == '__main__': dataset_dir = '/data8T/ycf/project/data/Paris/paris/*/*' out_dir = '/data8T/ycf/project/IRS/feas/crow/features/' query_out_dir = '/data8T/ycf/project/IRS/feas/crow/query_features/' device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if torch.cuda.is_available(): print('OK!') net = VGG16(pretrained=True) net.to(device) net.eval() query_lists = load_query_lists() for img in glob.glob(dataset_dir): base_img = os.path.basename(img) is_query = False if base_img.split('.jpg')[0] in query_lists: is_query = True image = load_img(img) if image is None: print('err: ' + img) continue print(img) tensor_image = format_img_for_vgg(image)
def wa_sgd_run(rank, args): global best_acc1 if rank == 0: print("wa_sgd ", "average weight" if args.ave_weight else "average \ gradients") if args.adjlr: writer = SummaryWriter(log_dir=args.logdir, comment='wa_sgd_ave_w_vgg16_adjlr{:.3f}_m{:.2f}'.format(args.lr, args.momentum) if args.ave_weight else 'wa_sgd_ave_g_vgg16_adjlr{:.3f}_m{:.2f}'.format(args.lr, args.momentum)) else: writer = SummaryWriter(log_dir=args.logdir, comment='wa_sgd_ave_w_vgg16_lr{:.3f}_m{:.2f}'.format(args.lr, args.momentum) if args.ave_weight else 'wa_sgd_ave_g_vgg16_lr{:.3f}_m{:.2f}'.format(args.lr, args.momentum)) #init process group if args.gpus is not None: os.environ['CUDA_VISIBLE_DEVICES'] = ",".join(args.gpus) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=rank) args.batch_size = int(args.batch_size/args.world_size) device = torch.device("cuda:{}".format(rank)) print("batchsize ", args.batch_size, " rank ", rank, " device ", device) model = VGG16(num_classes=args.classes).to(device) #model = ResNet50(num_classes=args.classes).to(device) #model = models.resnet50(num_classes=args.classes).to(device) #model = models.vgg16_bn(num_classes=args.classes).to(device) #model = torch.nn.parallel.DistributedDataParallel(model) criterion = nn.CrossEntropyLoss().to(device) #model = VGG16(num_classes=args.classes).cuda() #model = torch.nn.parallel.DistributedDataParallel(model) optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = optim.SGD(model.parameters(), args.lr) #optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum) #open cudnn benchmark #cudnn.benchmark = True normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( train_dir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( datasets.ImageFolder(val_dir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) acc_red = torch.zeros(1).to(device) acum_time = 0.0 for epoch in range(0, args.epochs): train_sampler.set_epoch(epoch) if args.adjlr: adjust_learning_rate(optimizer, epoch, args) # train for one epoch batch_time = train(device, train_loader, model, criterion, optimizer, epoch, args) acum_time += batch_time # evaluate on validation set acc1 = validate(device, val_loader, model, criterion, args) # average acc1 acc_red[0] = acc1 dist.reduce(tensor=acc_red, dst=0, op=dist.ReduceOp.SUM) acc_red.div_(args.world_size*1.0) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) #print("final best acc of epoch %d : %f" % (epoch, best_acc1)) if rank == 0: print("==> acc1 ", acc_red[0].item()) writer.add_scalar('test acc1 ', acc_red[0].item(), epoch) writer.add_scalar('acc1 over time(0.1s)', acc_red[0].item(), int(acum_time*10))