elif conf.model == 'shz_fusion': from lib.shz_models.rel_model_fusion import RelModel elif conf.model == 'shz_fusion_beta': from lib.shz_models.rel_model_fusion_beta import RelModel # -- else: raise ValueError() # -- Create Tensorboard summary writer writer = SummaryWriter(comment='_run#' + conf.save_dir.split('/')[-1]) # -- Create dataset splits and dataset loader train, val, _ = VG.splits( num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', # -- Depth dataset parameters use_depth=conf.load_depth, three_channels_depth=conf.pretrained_depth) train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus, # -- Depth dataset parameters use_depth=conf.load_depth) # -- Create the specified Relation-Detection model
from lib.evaluation.sg_eval import BasicSceneGraphEvaluator from tqdm import tqdm from config import BOX_SCALE, IM_SCALE import dill as pkl import os conf = ModelConfig() if conf.model == 'motifnet': from lib.rel_model import RelModel elif conf.model == 'stanford': from lib.rel_model_stanford import RelModelStanford as RelModel else: raise ValueError() train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet') if conf.test: val = test if conf.train: val = train train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals,
""" SCRIPT TO MAKE MEMES. this was from an old version of the code, so it might require some fixes to get working. """ from dataloaders.visual_genome import VG # import matplotlib # # matplotlib.use('Agg') from tqdm import tqdm import seaborn as sns import numpy as np from lib.fpn.box_intersections_cpu.bbox import bbox_overlaps from collections import defaultdict train, val, test = VG.splits(filter_non_overlap=False, num_val_im=2000) count_threshold = 50 pmi_threshold = 10 o_type = [] f = open("object_types.txt") for line in f.readlines(): tabs = line.strip().split("\t") t = tabs[1].split("_")[0] o_type.append(t) r_type = [] f = open("relation_types.txt") for line in f.readlines(): tabs = line.strip().split("\t") t = tabs[1].split("_")[0] r_type.append(t)
def main(): args = 'X -m predcls -model motifnet -order leftright -nl_obj 2 -nl_edge 4 -b 6 -clip 5 -p 100 -hidden_dim 512 -pooling_dim 4096 -lr 1e-3 -ngpu 1 -test -ckpt checkpoints/vgrel-motifnet-sgcls.tar -nepoch 50 -use_bias -multipred -cache motifnet_predcls1' sys.argv = args.split(' ') conf = ModelConfig() if conf.model == 'motifnet': from lib.rel_model import RelModel elif conf.model == 'stanford': from lib.rel_model_stanford import RelModelStanford as RelModel else: raise ValueError() train, val, test = VG.splits( num_val_im=conf.val_size, filter_duplicate_rels=True, use_proposals=conf.use_proposals, filter_non_overlap=conf.mode == 'sgdet', ) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits( train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus ) detector = RelModel( classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge, pooling_dim=conf.pooling_dim, rec_dropout=conf.rec_dropout, use_bias=conf.use_bias, use_tanh=conf.use_tanh, limit_vision=conf.limit_vision ) detector.cuda() ckpt = torch.load(conf.ckpt) optimistic_restore(detector, ckpt['state_dict']) evaluator = BasicSceneGraphEvaluator.all_modes( multiple_preds=conf.multi_pred) mode, N = 'test.multi_pred', 20 recs = pkl.load(open('{}.{}.pkl'.format(mode, N), 'rb')) np.random.seed(0) # sorted_idxs = np.argsort(recs) selected_idxs = np.random.choice(range(len(recs)), size=100, replace=False) sorted_idxs = selected_idxs[np.argsort(np.array(recs)[selected_idxs])] print('Sorted idxs: {}'.format(sorted_idxs.tolist())) save_dir = '/nethome/bamos/2018-intel/data/2018-07-31/sgs.multi' for idx in selected_idxs: gt_entry = { 'gt_classes': val.gt_classes[idx].copy(), 'gt_relations': val.relationships[idx].copy(), 'gt_boxes': val.gt_boxes[idx].copy(), } detector.eval() det_res = detector[vg_collate([test[idx]], num_gpus=1)] boxes_i, objs_i, obj_scores_i, rels_i, pred_scores_i = det_res pred_entry = { 'pred_boxes': boxes_i * BOX_SCALE/IM_SCALE, 'pred_classes': objs_i, 'pred_rel_inds': rels_i, 'obj_scores': obj_scores_i, 'rel_scores': pred_scores_i, } unique_cnames = get_unique_cnames(gt_entry, test) save_img(idx, recs, test, gt_entry, det_res, unique_cnames, save_dir) save_gt_graph(idx, test, gt_entry, det_res, unique_cnames, save_dir) save_pred_graph(idx, test, pred_entry, det_res, unique_cnames, save_dir, multi_pred=conf.multi_pred, n_pred=20)
import pandas as pd import time import os from config import ModelConfig, FG_FRACTION, RPN_FG_FRACTION, IM_SCALE, BOX_SCALE from torch.nn import functional as F from lib.fpn.box_utils import bbox_loss import torch.backends.cudnn as cudnn from pycocotools.cocoeval import COCOeval from lib.pytorch_misc import optimistic_restore, clip_grad_norm from torch.optim.lr_scheduler import ReduceLROnPlateau cudnn.benchmark = True conf = ModelConfig() train, val, _ = VG.splits(num_val_im=conf.val_size, filter_non_overlap=False, filter_empty_rels=False, use_proposals=conf.use_proposals) train_loader, val_loader = VGDataLoader.splits(train, val, batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = ObjectDetector( classes=train.ind_to_classes, num_gpus=conf.num_gpus, mode='rpntrain' if not conf.use_proposals else 'proposals', use_resnet=conf.use_resnet) detector.cuda() # Note: if you're doing the stanford setup, you'll need to change this to freeze the lower layers
type=str, default='caches/kern_sgcls.pkl') args = parser.parse_args() os.makedirs(args.save_dir, exist_ok=True) image_dir = os.path.join(args.save_dir, 'images') graph_dir = os.path.join(args.save_dir, 'graphs') os.makedirs(image_dir, exist_ok=True) os.makedirs(graph_dir, exist_ok=True) mode = 'sgcls' # this code is only for sgcls task # train, val, test = VG.splits(num_val_im=conf.val_size, filter_duplicate_rels=True, # use_proposals=conf.use_proposals, # filter_non_overlap=conf.mode == 'sgdet') train, val, test = VG.splits(num_val_im=5000, filter_duplicate_rels=True, use_proposals=False, filter_non_overlap=False) val = test # train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', # batch_size=conf.batch_size, # num_workers=conf.num_workers, # num_gpus=conf.num_gpus) train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=1, num_workers=1, num_gpus=1) ind_to_predicates = train.ind_to_predicates ind_to_classes = train.ind_to_classes
import pandas as pd import time from tqdm import tqdm from torch.nn.functional import cross_entropy as CE from lib.pytorch_misc import * from lib.evaluation.sg_eval import BasicSceneGraphEvaluator, calculate_mR_from_evaluator_list, eval_entry import pickle from lib.rel_model_stanford import RelModelStanford EVAL_MODES = ['sgdet'] if conf.mode == 'sgdet' else ['predcls', 'sgcls'] assert conf.mode in EVAL_MODES, (conf.mode, 'other modes not supported') train, val_splits = VG.splits(data_dir=conf.data, num_val_im=conf.val_size, min_graph_size=conf.min_graph_size, max_graph_size=conf.max_graph_size, mrcnn=conf.detector == 'mrcnn', filter_non_overlap=conf.mode == 'sgdet', exclude_left_right=conf.exclude_left_right) train_loader, val_loaders = VGDataLoader.splits(train, val_splits, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) val_loader, val_loader_zs, test_loader, test_loader_zs = val_loaders detector = RelModelStanford(train_data=train, num_gpus=conf.num_gpus, mode=conf.mode,
import numpy as np import torch from config import ModelConfig from lib.pytorch_misc import optimistic_restore from lib.evaluation.sg_eval import BasicSceneGraphEvaluator from tqdm import tqdm from config import BOX_SCALE, IM_SCALE from lib.fpn.box_utils import bbox_overlaps from collections import defaultdict from PIL import Image, ImageDraw, ImageFont import os from functools import reduce conf = ModelConfig() train, val, test = VG.splits(num_val_im=conf.val_size) if conf.test: val = test train_loader, val_loader = VGDataLoader.splits(train, val, mode='rel', batch_size=conf.batch_size, num_workers=conf.num_workers, num_gpus=conf.num_gpus) detector = RelModel(classes=train.ind_to_classes, rel_classes=train.ind_to_predicates, num_gpus=conf.num_gpus, mode=conf.mode, require_overlap_det=True, use_resnet=conf.use_resnet, order=conf.order, nl_edge=conf.nl_edge, nl_obj=conf.nl_obj, hidden_dim=conf.hidden_dim, use_proposals=conf.use_proposals, pass_in_obj_feats_to_decoder=conf.pass_in_obj_feats_to_decoder, pass_in_obj_feats_to_edge=conf.pass_in_obj_feats_to_edge,
def __init__(self, classes, rel_classes, embed_dim, obj_dim, inputs_dim, hidden_dim, pooling_dim, recurrent_dropout_probability=0.2, use_highway=True, use_input_projection_bias=True, use_vision=True, use_bias=True, use_tanh=True, limit_vision=True, sl_pretrain=False, num_iter=-1): """ Initializes the RNN :param embed_dim: Dimension of the embeddings :param encoder_hidden_dim: Hidden dim of the encoder, for attention purposes :param hidden_dim: Hidden dim of the decoder :param vocab_size: Number of words in the vocab :param bos_token: To use during decoding (non teacher forcing mode)) :param bos: beginning of sentence token :param unk: unknown token (not used) """ super(DecoderRNN, self).__init__() self.rel_embedding_dim = 100 self.classes = classes self.rel_classes = rel_classes embed_vecs = obj_edge_vectors(['start'] + self.classes, wv_dim=100) self.obj_embed = nn.Embedding(len(self.classes), embed_dim) self.obj_embed.weight.data = embed_vecs embed_rels = obj_edge_vectors(self.rel_classes, wv_dim=self.rel_embedding_dim) self.rel_embed = nn.Embedding(len(self.rel_classes), self.rel_embedding_dim) self.rel_embed.weight.data = embed_rels self.embed_dim = embed_dim self.obj_dim = obj_dim self.hidden_size = hidden_dim self.inputs_dim = inputs_dim self.pooling_dim = pooling_dim self.nms_thresh = 0.3 self.use_vision = use_vision self.use_bias = use_bias self.use_tanh = use_tanh self.limit_vision = limit_vision self.sl_pretrain = sl_pretrain self.num_iter = num_iter self.recurrent_dropout_probability = recurrent_dropout_probability self.use_highway = use_highway # We do the projections for all the gates all at once, so if we are # using highway layers, we need some extra projections, which is # why the sizes of the Linear layers change here depending on this flag. if use_highway: self.input_linearity = torch.nn.Linear( self.input_size, 6 * self.hidden_size, bias=use_input_projection_bias) self.state_linearity = torch.nn.Linear(self.hidden_size, 5 * self.hidden_size, bias=True) else: self.input_linearity = torch.nn.Linear( self.input_size, 4 * self.hidden_size, bias=use_input_projection_bias) self.state_linearity = torch.nn.Linear(self.hidden_size, 4 * self.hidden_size, bias=True) # self.obj_in_lin = torch.nn.Linear(self.rel_embedding_dim, self.rel_embedding_dim, bias=True) self.out = nn.Linear(self.hidden_size, len(self.classes)) self.reset_parameters() # For relation predication embed_vecs2 = obj_edge_vectors(self.classes, wv_dim=embed_dim) self.obj_embed2 = nn.Embedding(self.num_classes, embed_dim) self.obj_embed2.weight.data = embed_vecs2.clone() # self.post_lstm = nn.Linear(self.hidden_dim, self.pooling_dim * 2) self.post_lstm = nn.Linear(self.obj_dim + 2 * self.embed_dim + 128, self.pooling_dim * 2) # Initialize to sqrt(1/2n) so that the outputs all have mean 0 and variance 1. # (Half contribution comes from LSTM, half from embedding. # In practice the pre-lstm stuff tends to have stdev 0.1 so I multiplied this by 10. self.post_lstm.weight.data.normal_( 0, 10.0 * math.sqrt(1.0 / self.hidden_size) ) ######## there may need more consideration self.post_lstm.bias.data.zero_() self.rel_compress = nn.Linear(self.pooling_dim, self.num_rels, bias=True) self.rel_compress.weight = torch.nn.init.xavier_normal( self.rel_compress.weight, gain=1.0) if self.use_bias: self.freq_bias = FrequencyBias() # simple relation model from dataloaders.visual_genome import VG from lib.get_dataset_counts import get_counts, box_filter fg_matrix, bg_matrix = get_counts(train_data=VG.splits( num_val_im=5000, filter_non_overlap=True, filter_duplicate_rels=True, use_proposals=False)[0], must_overlap=True) prob_matrix = fg_matrix.astype(np.float32) prob_matrix[:, :, 0] = bg_matrix # TRYING SOMETHING NEW. prob_matrix[:, :, 0] += 1 prob_matrix /= np.sum(prob_matrix, 2)[:, :, None] # prob_matrix /= float(fg_matrix.max()) prob_matrix[:, :, 0] = 0 # Zero out BG self.prob_matrix = prob_matrix