def __init__(self, num_block, v_size, q_size, output_size, num_inter_head, num_intra_head, drop=0.0): super(SingleBlock, self).__init__() self.v_size = v_size self.q_size = q_size self.output_size = output_size self.num_inter_head = num_inter_head self.num_intra_head = num_intra_head self.num_block = num_block self.v_lin = FCNet(v_size, output_size, drop=drop) self.q_lin = FCNet(q_size, output_size, drop=drop) self.v2q_interBlock = OneSideInterModalityUpdate( output_size, output_size, output_size, num_inter_head, drop) self.q2v_interBlock = OneSideInterModalityUpdate( output_size, output_size, output_size, num_inter_head, drop) self.intraBlock = DyIntraModalityUpdate(output_size, output_size, output_size, num_intra_head, drop)
def __init__(self, in_features, mid_features, out_features, drop=0.0): super(Classifier, self).__init__() self.lin1 = FCNet(in_features, mid_features, activate='relu', drop=drop / 2.5) self.lin2 = FCNet(mid_features, out_features, drop=drop)
def __init__(self, num_block, v_size, q_size, output_size, num_inter_head, num_intra_head, drop=0.0): super(MultiBlock, self).__init__() self.v_size = v_size self.q_size = q_size self.output_size = output_size self.num_inter_head = num_inter_head self.num_intra_head = num_intra_head self.num_block = num_block self.v_lin = FCNet(v_size, output_size, drop=drop) self.q_lin = FCNet(q_size, output_size, drop=drop) blocks = [] for i in range(num_block): blocks.append( OneSideInterModalityUpdate(output_size, output_size, output_size, num_inter_head, drop)) blocks.append( OneSideInterModalityUpdate(output_size, output_size, output_size, num_inter_head, drop)) blocks.append( DyIntraModalityUpdate(output_size, output_size, output_size, num_intra_head, drop)) self.multi_blocks = nn.ModuleList(blocks)
def __init__(self, v_features, q_features, mid_features, num_obj, count, drop=0.0): super(ApplySingleAttention, self).__init__() self.count = count self.lin_v = FCNet(v_features, mid_features, drop=drop) # let self.lin take care of bias self.lin_q = FCNet(q_features, mid_features, drop=drop) self.lin_atten = FCNet(mid_features, mid_features, relu=False, drop=drop) self.lin_count = FCNet(num_obj + 1, mid_features, drop=0)
def __init__(self, src_size, tgt_size, output_size, num_head, drop=0.0): super(OneSideInterModalityUpdate, self).__init__() self.src_size = src_size self.tgt_size = tgt_size self.output_size = output_size self.num_head = num_head self.src_lin = FCNet(src_size, output_size * 2, drop=drop) self.tgt_lin = FCNet(tgt_size, output_size, drop=drop) self.tgt_output = FCNet(output_size + tgt_size, output_size, drop=drop)
def __init__(self, v_features, q_features, mid_features, glimpses, drop=0.0): super(Attention, self).__init__() self.lin_v = FCNet(v_features, mid_features, activate='relu') # let self.lin take care of bias self.lin_q = FCNet(q_features, mid_features, activate='relu') self.lin = FCNet(mid_features, glimpses, drop=drop)
def __init__(self, v_features, q_features, mid_features, glimpses, drop=0.0): super(BiAttention, self).__init__() self.hidden_aug = 1 self.glimpses = glimpses self.lin_v = FCNet(v_features, int(mid_features * self.hidden_aug), drop=drop/2.5) # let self.lin take care of bias self.lin_q = FCNet(q_features, int(mid_features * self.hidden_aug), drop=drop/2.5) self.h_weight = nn.Parameter(torch.Tensor(1, glimpses, 1, int(mid_features * self.hidden_aug)).normal_()) self.h_bias = nn.Parameter(torch.Tensor(1, glimpses, 1, 1).normal_()) self.drop = nn.Dropout(drop)
def __init__(self, v_size, q_size, output_size, num_head, drop=0.0): super(InterModalityUpdate, self).__init__() self.v_size = v_size self.q_size = q_size self.output_size = output_size self.num_head = num_head self.v_lin = FCNet(v_size, output_size * 3, drop=drop) self.q_lin = FCNet(q_size, output_size * 3, drop=drop) self.v_output = FCNet(output_size + v_size, output_size, drop=drop) self.q_output = FCNet(output_size + q_size, output_size, drop=drop)
def __init__(self, v_features, q_features, mid_features, dropout=0.0, sparse_graph=True): super(GraphLearner, self).__init__() self.sparse_graph = sparse_graph self.lin1 = FCNet(v_features + q_features, mid_features, activate='relu') self.lin2 = FCNet(mid_features, mid_features, activate='relu')
def __init__(self, v_size, q_size, output_size, num_head, drop=0.0): super(DyIntraModalityUpdate, self).__init__() self.v_size = v_size self.q_size = q_size self.output_size = output_size self.num_head = num_head self.v4q_gate_lin = FCNet(v_size, output_size, drop=drop) self.q4v_gate_lin = FCNet(q_size, output_size, drop=drop) self.v_lin = FCNet(v_size, output_size * 3, drop=drop) self.q_lin = FCNet(q_size, output_size * 3, drop=drop) self.v_output = FCNet(output_size, output_size, drop=drop) self.q_output = FCNet(output_size, output_size, drop=drop) self.relu = nn.ReLU() self.tanh = nn.Tanh() self.sigmoid = nn.Sigmoid()
def __init__(self, in_features, mid_features, out_features, drop=0.0): super(Classifier, self).__init__() self.lin11 = FCNet(in_features[0], mid_features) self.lin12 = FCNet(in_features[1], mid_features) self.lin2 = FCNet(mid_features, mid_features) self.lin3 = FCNet(mid_features, out_features, relu=False, drop=drop)