def __init__(self, name, channels_in=32, map_world_size=32, *inputs): super(GoalAuxiliary2D, self).__init__(name, *inputs) self.gather_2d = Gather2D() self.channels_in = channels_in self.map_world_size = map_world_size self.goal_linear = nn.Linear(channels_in, 2) enable_weight_saving(self.goal_linear, "aux_goal_linear_" + name) self.loss = nn.CrossEntropyLoss() self.accuracy_meter = MovingAverageMeter(10)
def __init__(self, name, feature_vec_len=32, num_classes=64, dropout=0, *inputs): super(ClassAuxiliary2D, self).__init__(name, *inputs) self.gather_2d = Gather2D() self.channels_in = feature_vec_len self.dropout = nn.Dropout(dropout) self.num_classes = num_classes self.cls_linear = nn.Linear(feature_vec_len, num_classes) enable_weight_saving(self.cls_linear, "aux_class_linear_2d_" + name) self.loss = nn.CrossEntropyLoss() if self.name == "aux_grounding_map": self.loss.weight = torch.tensor([0.5, 0.5]) self.meter_accuracy = MovingAverageMeter(10)
def __init__(self, name, world_size_px=32, feature_vec_len=32, num_classes=64, dropout=0, *inputs): super(ClassAuxiliary2D, self).__init__(name, *inputs) self.gather_2d = Gather2D() self.channels_in = feature_vec_len self.dropout = nn.Dropout(dropout) self.num_classes = num_classes self.world_size_px = world_size_px self.cls_linear = nn.Linear(feature_vec_len, num_classes) enable_weight_saving(self.cls_linear, "aux_class_linear_2d_" + name) self.loss = nn.CrossEntropyLoss() self.meter_accuracy = MovingAverageMeter(10)
def __init__(self, run_name, ignore_lang=False, class_loss=True, ground_loss=True): super(ModelTopDownPathGoalPredictor, self).__init__() self.run_name = run_name self.model_name = "top_down_path_pred_pretrain" self.writer = SummaryWriter(log_dir="runs/" + run_name) self.ignore_lang = ignore_lang self.class_loss = class_loss self.ground_loss = ground_loss # The feature net extracts the 2D feature map from the input image. # The label_pool down-sizes the ground-truth labels, which are input at the same size as the input image # The output predicted labels are the size of the feature map self.feature_net = ResNet13Light(32, down_pad=True) self.label_pool = nn.MaxPool2d(8) if self.ground_loss: self.lang_filter = MapLangSemanticFilter(sentence_embedding_size, 32, 3) self.aux_ground_linear = nn.Linear(3, 2) enable_weight_saving(self.lang_filter, "ground_filter") enable_weight_saving(self.aux_ground_linear, "ground_aux_linear") if RESNET: self.unet = ResNetConditional(sentence_embedding_size, 35, 2) else: unet_c_in = 35 if self.ground_loss else 32 unet_hc1 = 48 if self.ground_loss else 48 unet_hb1 = 24 if self.ground_loss else 24 self.unet = Unet5ContextualBneck(unet_c_in, 2, sentence_embedding_size, hc1=unet_hc1, hb1=unet_hb1, hc2=128, split_embedding=splitemb) if attention: self.sentence_embedding = SentenceEmbeddingSelfAttention( word_embedding_size, lstm_size, sentence_embedding_layers, attention_heads=attention_heads) else: self.sentence_embedding = SentenceEmbeddingSimple( word_embedding_size, sentence_embedding_size, sentence_embedding_layers) self.gather2d = Gather2D() if self.class_loss: self.aux_class_linear = nn.Linear(32, 64) enable_weight_saving(self.aux_class_linear, "class_aux_linear") print("Sentence Embedding #Params: ", get_n_params(self.sentence_embedding)) print("U-Net #Params: ", get_n_params(self.unet)) print("Class auxiliary: ", self.class_loss) print("Ground auxiliary: ", self.ground_loss) # Enable saving of pre-trained weights enable_weight_saving(self.feature_net, "feature_resnet_light") enable_weight_saving(self.unet, "unet") enable_weight_saving(self.sentence_embedding, "sentence_embedding") if NLL: #self.mask_loss = nn.BCELoss() self.mask_loss = nn.NLLLoss2d() elif BCE: self.mask_loss = nn.BCEWithLogitsLoss() elif CE: self.spatialsoftmax = SpatialSoftmax2d() self.mask_loss = CrossEntropy2d() else: self.mask_loss = nn.MSELoss() self.aux_loss = nn.CrossEntropyLoss(reduce=True, size_average=True) self.epoch_numbers = {"train": 0, "eval": 0} self.iter = nn.Parameter(torch.zeros(1), requires_grad=False) self.dropout = nn.Dropout(0.5) self.dropout2d = nn.Dropout2d(0.5) self.dropout3d = nn.Dropout3d(0.5) self.viz_images = [] self.instructions = []
def __init__(self, name, world_size_px=32, kind="l1", *inputs): super(FeatureRegularizationAuxiliary2D, self).__init__(name, *inputs) self.gather_2d = Gather2D() self.world_size_px = world_size_px self.kind = kind
def __init__(self, feature_vec_len, num_outputs=63): super(AuxLandmarkClassifier, self).__init__() self.aux_class_linear = nn.Linear(feature_vec_len, num_outputs) self.gather_2d = Gather2D() self.aux_loss = nn.CrossEntropyLoss(reduce=False, size_average=False)