def main(): global args print "Loading training set and testing set..." train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(not args.use_normal_anchors) if args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer = torch.optim.SGD([ {'params': list(net.parameters())[26:]}, ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print 'Training from scratch...Initializing network...' optimizer = torch.optim.SGD(list(net.parameters())[26:], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) network.set_trainable(net.features, requires_grad=False) net.cuda() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) best_recall = 0.0 for epoch in range(0, args.max_epoch): # Training # train(train_loader, net, optimizer, epoch) # Testing recall, RPN_precision, RPN_recall = test(test_loader, net) print('Epoch[{epoch:d}]: ' 'Recall: ' 'object: {recall: .3f}%% (Best: {best_recall: .3f}%%)'.format( epoch = epoch, recall=recall * 100, best_recall=best_recall * 100)) print('object: {precision: .3f}%% ' 'object: {recall: .3f}%% '.format(precision=RPN_precision*100, recall=RPN_recall*100)) # update learning rate if epoch % args.step_size == 0: args.disable_clip_gradient = True args.lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join(args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name, epoch)) network.save_net(save_name, net)
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(args.use_normal_anchors) network.load_net('./output/RPN/RPN_region_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() best_recall = np.array([0.0, 0.0]) # Testing recall = test(test_loader, net) print( 'Recall: ' 'object: {recall[0]: .3f}%% (Best: {best_recall[0]: .3f}%%)' 'relationship: {recall[1]: .3f}%% (Best: {best_recall[1]: .3f}%%)'. format(recall=recall * 100, best_recall=best_recall * 100))
def main(): global args print "Loading testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(args.use_kmeans_anchors) network.load_net('./output/RPN/RPN_relationship_best_kmeans.h5', net) net.cuda() # best_recall = np.array([0.0, 0.0]) # Testing recall = test(test_loader, net) print( 'Recall: ' 'object: {recall[0]: .3f}%' 'relationship: {recall[1]: .3f}%'.format(recall=recall * 100))
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') object_classes = test_set.object_classes print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net) # network.load_net('./output/detection/RPN_object1_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() # Testing recall = test(test_loader, net) print('Recall: ' 'object: {recall: .3f}%'.format(recall=recall*100))
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set..."), train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print("Done.") train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) # Model declaration net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, n_vocab=train_set.voc_size, voc_sign=train_set.voc_sign, max_word_length=train_set.max_size, MPS_iter=args.MPS_iter, use_language_loss=not args.disable_language_model, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=not args.use_normal_anchors, gate_width=args.gate_width, nhidden_caption=args.nhidden_caption, nembedding=args.nembedding, rnn_type=args.rnn_type, rnn_droptout=args.caption_use_dropout, rnn_bias=args.caption_use_bias, use_region_reg=args.region_bbox_reg, use_kernel=args.use_kernel_function) params = list(net.parameters()) for param in params: print param.size() print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features( net) # Setting the state of the training model net.cuda() net.train() logger_path = "log/logger/{}".format(args.model_name) if os.path.exists(logger_path): shutil.rmtree(logger_path) configure(logger_path, flush_secs=5) # setting up the logger network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.finetune_language_model: print 'Only finetuning the language model from: {}'.format( args.resume_model) args.train_all = False if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer_select = 3 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.saved_model_path) args.train_all = False network.load_net(args.saved_model_path, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 elif args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) args.train_all = True optimizer_select = 2 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) if args.evaluate: recall = test(test_loader, net, top_Ns) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) print('==============================') else: for epoch in range(0, args.max_epoch): # Training train(train_loader, target_net, optimizer, epoch) # snapshot the state save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) # Testing # network.set_trainable(net, False) # Without backward(), requires_grad takes no effect recall = test(test_loader, net, top_Ns) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name)) network.save_net(save_name, net) print('\nsave model: {}'.format(save_name)) print('Epoch[{epoch:d}]:'.format(epoch=epoch)), for idx, top_N in enumerate(top_Ns): print( '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)), # updating learning policy if epoch % args.step_size == 0 and epoch > 0: lr /= 10 args.lr = lr print '[learning rate: {}]'.format(lr) args.enable_clip_gradient = False if not args.finetune_language_model: args.train_all = True optimizer_select = 2 # update optimizer and correponding requires_grad state optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features)
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set...") train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome(args.dataset_option, 'test') print("Done.") train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, MPS_iter=args.MPS_iter, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=args.use_kmeans_anchors, base_model=args.base_model) #True # params = list(net.parameters()) # for param in params: # print param.size() print net # Setting the state of the training model net.cuda() net.train() network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.resume_model: print 'Resume training from: {}'.format(args.HDN_model) if len(args.HDN_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.HDN_model, net) # network.load_net(args.RPN_model, net.rpn) args.train_all = True optimizer_select = 3 elif args.load_RCNN: print 'Loading pretrained RCNN: {}'.format(args.RCNN_model) args.train_all = False network.load_net(args.RCNN_model, net.rcnn) optimizer_select = 2 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.RPN_model) args.train_all = False network.load_net(args.RPN_model, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True # To group up the features # vgg_features_fix, vgg_features_var, rpn_features, hdn_features = group_features(net) basenet_features, rpn_features, rcnn_feature, hdn_features = group_features( net) optimizer = network.get_optimizer(lr, optimizer_select, args, basenet_features, rpn_features, rcnn_feature, hdn_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) if args.evaluate: recall = test(test_loader, target_net, top_Ns, train_set.object_classes) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) print('==============================') else: for epoch in range(0, args.max_epoch): # Training train(train_loader, target_net, optimizer, epoch) # snapshot the state save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) recall = test(test_loader, target_net, top_Ns, train_set.object_classes) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name)) network.save_net(save_name, net) print('\nsave model: {}'.format(save_name)) print('Epoch[{epoch:d}]:'.format(epoch=epoch)), for idx, top_N in enumerate(top_Ns): print( '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) # updating learning policy if (epoch + 1) % args.step_size == 0 or (epoch + 1) % ( args.step_size + 2) == 0: lr /= 10 args.lr = lr print '[learning rate: {}]'.format(lr) args.enable_clip_gradient = False args.train_all = False optimizer_select = 2 # update optimizer and correponding requires_grad state optimizer = network.get_optimizer(lr, optimizer_select, args, basenet_features, rpn_features, rcnn_feature, hdn_features)
def __init__(self, encoding, splits = [10,100,500, 1000,3000, 5000, 10000, 15000], prediction_type = "max_max", ngpus = 1, cnn_type = "faster_rcnn"): super(baseline_crf, self).__init__() self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.train_transform = tv.transforms.Compose([ tv.transforms.Scale(224), tv.transforms.RandomCrop(224), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), self.normalize, ]) self.dev_transform = tv.transforms.Compose([ tv.transforms.Scale(224), tv.transforms.CenterCrop(224), tv.transforms.ToTensor(), self.normalize, ]) self.broadcast = [] self.nsplits = len(splits) self.splits = splits self.encoding = encoding self.prediction_type = prediction_type self.n_verbs = encoding.n_verbs() self.split_vr = {} self.v_roles = {} train_set = visual_genome('srl', 'train') #cnn print cnn_type if cnn_type == "resnet_101" : self.cnn = resnet_modified_large() elif cnn_type == "resnet_50": self.cnn = resnet_modified_medium() elif cnn_type == "resnet_34": self.cnn = resnet_modified_small() elif cnn_type == "faster_rcnn": self.cnn = faster_rcnn(nhidden=1024, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, n_vocab=train_set.voc_size, voc_sign=train_set.voc_sign, max_word_length=train_set.max_size, MPS_iter=1, use_language_loss= False, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, ) #for dense vsrl else: print "unknown base network" exit() self.rep_size = self.cnn.rep_size() for s in range(0,len(splits)): self.split_vr[s] = [] #sort by length remapping = [] for (vr, ns) in encoding.vr_id_n.items(): remapping.append((vr, len(ns))) #find the right split for (vr, l) in remapping: i = 0 for s in splits: if l <= s: break i+=1 _id = (i, vr) self.split_vr[i].append(_id) total = 0 for (k,v) in self.split_vr.items(): #print "{} {} {}".format(k, len(v), splits[k]*len(v)) total += splits[k]*len(v) #print "total compute : {}".format(total) #keep the splits sorted by vr id, to keep the model const w.r.t the encoding for i in range(0,len(splits)): s = sorted(self.split_vr[i], key = lambda x: x[1]) self.split_vr[i] = [] #enumerate? for (x, vr) in s: _id = (x,len(self.split_vr[i]), vr) self.split_vr[i].append(_id) (v,r) = encoding.id_vr[vr] if v not in self.v_roles: self.v_roles[v] = [] self.v_roles[v].append(_id) #create the mapping for grouping the roles back to the verbs later max_roles = encoding.max_roles() #need a list that is nverbs by 6 self.v_vr = [ 0 for i in range(0, self.encoding.n_verbs()*max_roles) ] splits_offset = [] for i in range(0,len(splits)): if i == 0: splits_offset.append(0) else: splits_offset.append(splits_offset[-1] + len(self.split_vr[i-1])) #and we need to compute the position of the corresponding roles, and pad with the 0 symbol for i in range(0, self.encoding.n_verbs()): offset = max_roles*i roles = sorted(self.v_roles[i] , key=lambda x: x[2]) #stored in role order self.v_roles[i] = roles k = 0 for (s, pos, r) in roles: #add one to account of the 0th element being the padding self.v_vr[offset + k] = splits_offset[s] + pos + 1 k+=1 #pad while k < max_roles: self.v_vr[offset + k] = 0 k+=1 gv_vr = Variable(torch.LongTensor(self.v_vr).cuda())#.view(self.encoding.n_verbs(), -1) for g in range(0,ngpus): self.broadcast.append(Variable(torch.LongTensor(self.v_vr).cuda(g))) self.v_vr = gv_vr #print self.v_vr #verb potential self.linear_v = nn.Linear(self.rep_size, self.encoding.n_verbs()) #verb-role-noun potentials self.linear_vrn = nn.ModuleList([ nn.Linear(self.rep_size, splits[i]*len(self.split_vr[i])) for i in range(0,len(splits))]) self.total_vrn = 0 for i in range(0, len(splits)): self.total_vrn += splits[i]*len(self.split_vr[i]) print "total encoding vrn : {0}, with padding in {1} groups : {2}".format(encoding.n_verbrolenoun(), self.total_vrn, len(splits)) #initilize everything initLinear(self.linear_v) for _l in self.linear_vrn: initLinear(_l) self.mask_args()
def main(): global args print "Loading training set and testing set..." train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome(args.dataset_option, 'test') object_classes = test_set.object_classes print "Done." train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(nhidden=args.mps_feature_len, use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) if args.resume_model: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.detection_model, net) # optimizer = torch.optim.SGD([ # {'params': list(net.parameters())}, # ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print 'Training from scratch...Initializing network...' optimizer = torch.optim.SGD(list(net.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=0.0005) # network.set_trainable(net.features, requires_grad=True) net.cuda() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) best_map = 0.0 for epoch in range(0, args.max_epoch): # Training train(train_loader, net, optimizer, epoch) # update learning rate if epoch % args.step_size == args.step_size - 1: args.clip_gradient = False args.lr /= 5 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) try: # Testing map = evaluate(test_loader, net, object_classes) print( 'Epoch[{epoch:d}]: ' 'Recall: ' 'object: {map: .3f}%% (Best: {best_map: .3f}%%)'.format( epoch=epoch, map=map * 100, best_map=best_map * 100)) if map > best_map: best_map = map save_name = os.path.join( args.output_dir, '{}_best.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) except: continue
def __init__(self): # To set the model name automatically args = parser.parse_args() print args args = get_model_name(args) print 'Model name: {}'.format(args.model_name) self.check = True # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training params"), self.train_set = visual_genome('normal', 'train') print("Done.") self.train_loader = torch.utils.data.DataLoader(self.train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) end = time.time() # Model declaration self.net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=self.train_set.num_object_classes, n_predicate_cats=self.train_set.num_predicate_classes, n_vocab=self.train_set.voc_size, voc_sign=self.train_set.voc_sign, max_word_length=self.train_set.max_size, MPS_iter=args.MPS_iter, use_language_loss=not args.disable_language_model, object_loss_weight=self.train_set.inverse_weight_object, predicate_loss_weight=self.train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=not args.use_normal_anchors, gate_width=args.gate_width, nhidden_caption=args.nhidden_caption, nembedding=args.nembedding, rnn_type=args.rnn_type, rnn_droptout=args.caption_use_dropout, rnn_bias=args.caption_use_bias, use_region_reg=args.region_bbox_reg, use_kernel=args.use_kernel_function) params = list(self.net.parameters()) for param in params: print param.size() print self.net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features( self.net) # Setting the state of the training model self.net.cuda() self.net.train() network.set_trainable(self.net, False) # loading model for inference print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, self.net) args.train_all = True optimizer_select = 2 optimizer = network.get_optimizer(args.lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features) target_net = self.net self.net.eval() print('Model Loading time: ', time.time() - end) # Set topics self.bridge = CvBridge() self.dot = Digraph(comment='warehouse', format='svg') self.regions_dot = Digraph(comment='regions', format='svg') self.image_sub = message_filters.Subscriber( '/turtlebot2i/camera/rgb/raw_image', Image) self.image_depth_sub = message_filters.Subscriber( '/turtlebot2i/camera/depth/raw_image', Image) self.ts = message_filters.TimeSynchronizer( [self.image_sub, self.image_depth_sub], queue_size=1) print('calling callback') self.ts.registerCallback(self.callback) self.scenegraph_pub = rospy.Publisher('/turtlebot2i/scene_graph', SceneGraph, queue_size=10)