Пример #1
0
def load_named_sparse(input_filename, key):
    from ipdb import set_trace as bp
    bp()
    npy = np.load(input_filename)[key]
    coo_matrix = sparse.coo_matrix((npy['data'], (npy['row'], npy['col'])),
                                   shape=npy['shape'])
    return coo_matrix.tocsc()
Пример #2
0
def Apply():
    global mod_vps, apply_response
    r = request
    if r.method == 'POST':
        jsondata = r.get_json()
        if type(jsondata) is str:
            rcv_data = json.loads(jsondata)
        else:
            rcv_data = jsondata
        # type(rcv_data) must be dict

        if type(rcv_data) is not dict:
            print("Invalid post data type from client")
            apply_response = {"vps_IDandConf": [[0], [0]]}
            response_pickled = jsonpickle.encode(apply_response)
            return Response(response=response_pickled,
                            status=200,
                            mimetype="application/json")

        K = rcv_data['K']
        gps_lat = rcv_data['gps_lat']
        gps_lon = rcv_data['gps_lon']
        gps_accuracy = rcv_data['gps_accuracy']
        image_data = rcv_data['image_data']
        image_size = rcv_data['image_size']
        query = deserialize_image(image_data, image_size)
        timestamp = rcv_data['timestamp']
        streetview_server_ipaddr = rcv_data['streetview_server_ipaddr']
        # print(K, gps_lat, gps_lon, gps_accuracy, streetview_server_ipaddr)
        try:
            # vps_IDandConf = dummy_apply(image, K, gps_lat, gps_lon, gps_accuracy, 0)
            vps_IDandConf = mod_vps.apply(query,
                                          K,
                                          gps_lat,
                                          gps_lon,
                                          gps_accuracy,
                                          timestamp,
                                          ipaddr=streetview_server_ipaddr)
        except:
            bp()
        apply_response = {
            "vps_IDandConf": vps_IDandConf,
            'timestamp': timestamp
        }
        response_pickled = jsonpickle.encode(apply_response)
        return Response(response=response_pickled,
                        status=200,
                        mimetype="application/json")
    if r.method == 'GET':
        if not 'apply_response' in globals():
            apply_response = {
                "vps_IDandConf": np.zeros((2, 3)).tolist(),
                'timestamp': 0
            }
        response_pickled = jsonpickle.encode(apply_response)
        return Response(response=response_pickled,
                        status=200,
                        mimetype="application/json")
Пример #3
0
def balancedMiniDataset(trainset, size, limit, fullyBalanced=True):
    counter = np.zeros(20)
    #counter += size
    
    #if fullyBalanced:
    #    dropClasses = [50,39,80,74,43,37,31,84,48,89,40,27,45,12,26,29,30,66,68,69,71,83,0]
    #else:
    #    dropClasses = [45,12,26,29,30,66,68,69,71,83,0]
    #for ccc in dropClasses:
    #    counter[dropClasses] = size
    #for ccc in keepClasses:
    #    counter[keepClasses] = 0
    iterating = True
    step = 0
    subsetToInclude = []
    subsetToNotInclude = []
    #subsetToNotInclude += list(range(step))
    while iterating and step < limit:
        #bp()
        try:
            #bp()
            label = np.array(trainset[step][1])
            #bp()
            if np.all(counter + label <= size) and (not fullyBalanced or np.sum(label).item() == 1):
                counter += label
                print(counter, step)
                subsetToInclude.append(step)
            else:
                subsetToNotInclude.append(step)
            if np.min(counter) >= size:
                print("Completely Balanced Dataset")
                iterating = False
        except:
            print(step)
        if step%1000 == 0:
            print(step)
        step += 1

    #subsetToNotInclude += list(range(step, len(trainset)))
    #subsetToNotInclude = subsetToNotInclude[:10000]
    #while len(subsetToNotInclude) < 10000:
    #    try:
    #        label = np.array(trainset[step][1])
    #        subsetToNotInclude.append(step)
    #    except:
    #        print(step)
    #subsetToNotInclude += list(range(step, len(trainset)))
    np.savetxt('/home/users/alimirz1/SemisupervisedAttention/saved_batches/coco_splits/'+str(size)+'_per_top20class.csv', np.array(subsetToInclude), delimiter=',')

    np.savetxt('/home/users/alimirz1/SemisupervisedAttention/saved_batches/coco_splits/'+str(size)+'_per_top20class_validation.csv', np.array(subsetToNotInclude), delimiter=',')
    bp()
    return torch.utils.data.Subset(trainset, subsetToInclude), torch.utils.data.Subset(trainset, subsetToNotInclude) 
Пример #4
0
 def __init__(self):
     self.ipaddr = 'localhost'
     self.gps_lat = 0.0 #Latitude
     self.gps_lon = 0.0 #Longitude
     self.vps_lat = 0.0 # Latitude from VPS function
     self.vps_long = 0.0 # Longitude from VPS function
     self.angle = -1  # road direction (radian)
     self.vps_prob = -1   # reliablity of the result. 0: fail ~ 1: success
     self.K = int(3) # K for Top-K for best matching
     if self.init_vps_IDandConf(self.K) < 0: #init_vps_IDandConf after setting self.K
         bp()
     self.ToTensor = transforms.ToTensor()
     self.verbose = False # 1 : print internal results
     self.StreetViewServerAvaiable = True
     self.callcounter_gSV = 0 # N of call of getStreetView(), for debugging purpose
Пример #5
0
    def __init__(self, structFile, input_transform=None, onlyDB=False):
        super().__init__()

        self.input_transform = input_transform

        self.dbStruct = parse_dbStruct(structFile)
        self.images = [join(root_dir, dbIm) for dbIm in self.dbStruct.dbImage]
        bp()
        if not onlyDB:
            self.images += [
                join(queries_dir, qIm) for qIm in self.dbStruct.qImage
            ]

        self.whichSet = self.dbStruct.whichSet
        self.dataset = self.dbStruct.dataset

        self.positives = None
        self.distances = None
Пример #6
0
    def _read(self, file_path):
        examples = pickle.load(open(file_path, "rb"))
        for ix, example in enumerate(examples):
            padded_batch_size = example["max_entity_per_doc"]

            mat = example["text"].todense()
            _, vocab_size = mat.shape
            all_idx = [i for i in range(example["text"].shape[0])]
            entities_idx = [
                entity["entity_text_ids"] for entity in example["entities"]
            ]
            all_entities_idx = list(itertools.chain(*entities_idx))
            context_idx = [i for i in all_idx if i not in all_entities_idx]

            if len(context_idx) == 0:
                continue

            if len(entities_idx) == 0:
                continue
            entities = np.stack([mat[elm].sum(0) for elm in entities_idx])
            # bp()
            try:
                context = np.stack(mat[context_idx])
            except:
                bp()
            # vec = np.zeros((padded_batch_size, vocab_size))
            # vec[:entities.shape[0], :] = entities
            vec = entities
            vec = context
            # vec = mat

            if self._use_doc_info:
                d = mat.sum(0).repeat(len(entities_idx), axis=0)
                # vec_d = np.zeros((padded_batch_size, vocab_size))
                # vec_d[d.shape[0], :] = d
                vec_d = d
                vec = np.concatenate([vec, vec_d], axis=1)
            instance = self.text_to_instance(vec)
            # bp()
            if instance is not None:
                yield instance
Пример #7
0
 def checking_return_value(self):
     K = self.K
     vps_imgID = self.vps_IDandConf[0]
     vps_imgConf = self.vps_IDandConf[1]
     if (len(vps_imgID) != K) or (len(vps_imgConf) != K):
         dsmg("Error : K result")
         bp()
         return -1
     ErrCnt = K
     for i in vps_imgID:
         #if (isinstance(vps_imgID[0],np.uint64) == False):
         if (isinstance(vps_imgID[0], int) == False):
             ErrCnt = ErrCnt - 1
     if K != ErrCnt:
         bp()
         return -1
     ErrCnt = K
     for i in vps_imgConf:
         #if (isinstance(vps_imgConf[0],np.double) == False):
         if (isinstance(vps_imgConf[0], float) == False):
             ErrCnt = ErrCnt - 1
     if K != ErrCnt:
         bp()
         return -1
     return 0
    def forward(self,  # pylint: disable=arguments-differ
                tokens: Union[Dict[str, torch.IntTensor], torch.IntTensor],
                epoch_num: List[int] = None):
        """
        Parameters
        ----------
        tokens: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]``
            A batch of tokens. We expect tokens to be represented in one of two ways:
                1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding
                must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these
                tokens during training.
                2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can
                precompute bag-of-word counts and train much faster.
        epoch_num: ``List[int]``
            Output of epoch tracker
        """

        # For easy transfer to the GPU.
        self.device = self.vae.get_beta().device  # pylint: disable=W0201

        output_dict = {}

        if not self.training:
            self._kld_weight = 1.0  # pylint: disable=W0201
        else:
            self.update_kld_weight(epoch_num)

        # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder
        if isinstance(tokens, dict):
            embedded_tokens = (self._bag_of_words_embedder(tokens['tokens']).to(device=self.device))
        else:
            embedded_tokens = tokens

        _, num_p, x_dim = embedded_tokens.shape
        if self._use_doc_info:
            # bp()
            embedded_doc_tokens, embedded_entity_tokens = embedded_tokens.split(x_dim // 2, dim=1)
            weights = torch.softmax(self.interpolation, dim=0)
            embedded_tokens = weights[0] * embedded_doc_tokens + weights[1] * embedded_entity_tokens
        else:
            # bp()
            assert x_dim == self.vocab.get_vocab_size(self.vocab_namespace)
        # Encode the text into a shared representation for both the VAE
        embedded_tokens = embedded_tokens.sum(1)
        encoder_output = self.vae.encode(embedded_tokens)

        # Perform variational inference.
        variational_output = self.vae(encoder_output)

        # Reconstructed bag-of-words from the VAE with background bias.
        reconstructed_bow = variational_output['reconstruction'] + self._background_freq

        # Apply batchnorm to the reconstructed bag of words.
        # Helps with word variety in topic space.

        reconstructed_bow = self.bow_bn(reconstructed_bow) if self._apply_batchnorm_on_recon else reconstructed_bow

        # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b)
        if self._use_doc_info:
            reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_entity_tokens)
        else:
            # bp()
            reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_tokens)

        # KL-divergence that is returned is the mean of the batch by default.
        negative_kl_divergence = variational_output['negative_kl_divergence']

        # Compute ELBO
        elbo = negative_kl_divergence * self._kld_weight + reconstruction_loss

        loss = -torch.mean(elbo)

        output_dict['loss'] = loss
        theta = variational_output['theta']

        # Keep track of internal states for use downstream
        activations: List[Tuple[str, torch.FloatTensor]] = []
        # intermediate_input = embedded_tokens
        # for layer_index, layer in enumerate(self.vae.encoder._linear_layers):  # pylint: disable=protected-access
         #    intermediate_input = layer(intermediate_input)
         #    activations.append((f"encoder_layer_{layer_index}", intermediate_input))

        activations.append(('theta', theta))

        output_dict['activations'] = activations

        # Update metrics
        nkld = -torch.mean(negative_kl_divergence)
        nll = -torch.mean(reconstruction_loss)
        if torch.isnan(nkld):
            bp()
        if torch.isnan(nll):
            bp()
        if torch.isnan(loss):
            bp()
        
        self.metrics['nkld'](nkld)
        self.metrics['nll'](nll)
        self.metrics['perp'](loss)

        # batch_num is tracked for kl weight annealing
        self.batch_num += 1

        self.compute_custom_metrics_once_per_epoch(epoch_num)

        self.metrics['npmi'] = self._cur_npmi

        return output_dict
Пример #9
0
    def __init__(self,
                 vocab: Vocabulary,
                 bow_embedder: TokenEmbedder,
                 vae: VAE,
                 apply_batchnorm_on_recon: bool = False,
                 batchnorm_weight_learnable: bool = False,
                 batchnorm_bias_learnable: bool = True,
                 kl_weight_annealing: str = "constant",
                 linear_scaling: float = 1000.0,
                 sigmoid_weight_1: float = 0.25,
                 sigmoid_weight_2: float = 15,
                 reference_counts: str = None,
                 reference_vocabulary: str = None,
                 use_background: str = False,
                 background_data_path: str = None,
                 update_background_freq: bool = False,
                 track_topics: bool = True,
                 track_npmi: bool = True,
                 initializer: InitializerApplicator = InitializerApplicator(),
                 regularizer: Optional[RegularizerApplicator] = None) -> None:
        super().__init__(vocab, regularizer)

        self.metrics = {'nkld': Average(), 'nll': Average(), 'perp': Average()}

        self.vocab = vocab
        self.vae = vae
        self.track_topics = track_topics
        self.track_npmi = track_npmi
        self.vocab_namespace = "avitm"
        self._update_background_freq = update_background_freq

        avitm_vocab_size = self.vocab.get_vocab_size(self.vocab_namespace)
        self._background_freq = self.initialize_bg_from_file(
            file_=background_data_path) if use_background else 0
        self._ref_counts = reference_counts

        if reference_vocabulary is not None:
            # Compute data necessary to compute NPMI every epoch
            logger.info("Loading reference vocabulary.")
            self._ref_vocab = read_json(cached_path(reference_vocabulary))
            self._ref_vocab_index = dict(
                zip(self._ref_vocab, range(len(self._ref_vocab))))
            logger.info("Loading reference count matrix.")
            self._ref_count_mat = load_sparse(cached_path(self._ref_counts))
            logger.info("Computing word interaction matrix.")
            self._ref_doc_counts = (self._ref_count_mat > 0).astype(float)
            self._ref_interaction = self._ref_doc_counts.T.dot(
                self._ref_doc_counts)
            self._ref_doc_sum = np.array(
                self._ref_doc_counts.sum(0).tolist()[0])
            logger.info("Generating npmi matrices.")
            (self._npmi_numerator,
             self._npmi_denominator) = self.generate_npmi_vals(
                 self._ref_interaction, self._ref_doc_sum)
            self.n_docs = self._ref_count_mat.shape[0]

        self._bag_of_words_embedder = bow_embedder

        self._kl_weight_annealing = kl_weight_annealing

        self._linear_scaling = float(linear_scaling)
        self._sigmoid_weight_1 = float(sigmoid_weight_1)
        self._sigmoid_weight_2 = float(sigmoid_weight_2)
        if kl_weight_annealing == "linear":
            self._kld_weight = min(1.0, 1 / self._linear_scaling)
        elif kl_weight_annealing == "sigmoid":
            self._kld_weight = float(
                1 / (1 + np.exp(-self._sigmoid_weight_1 *
                                (1 - self._sigmoid_weight_2))))
        elif kl_weight_annealing == "constant":
            self._kld_weight = 1.0
        else:
            raise ConfigurationError(
                "anneal type {} not found".format(kl_weight_annealing))

        # setup batchnorm
        self._apply_batchnorm_on_recon = apply_batchnorm_on_recon
        if apply_batchnorm_on_recon:
            self.bow_bn = create_trainable_BatchNorm1d(
                avitm_vocab_size,
                weight_learnable=batchnorm_weight_learnable,
                bias_learnable=batchnorm_bias_learnable,
                eps=0.001,
                momentum=0.001,
                affine=True)

        # Maintain these states for periodically printing topics and updating KLD
        self._metric_epoch_tracker = 0
        self._kl_epoch_tracker = 0
        self._cur_epoch = 0
        self._cur_npmi = 0.0
        self.batch_num = 0

        initializer(self)
        bp()
Пример #10
0
    def forward(self,  # pylint: disable=arguments-differ
                tokens: Union[Dict[str, torch.IntTensor], torch.IntTensor],
                entities: Union[Dict[str, torch.IntTensor], torch.IntTensor],
                epoch_num: List[int] = None):
        """
        Parameters
        ----------
        tokens: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]``
            A batch of tokens. We expect tokens to be represented in one of two ways:
                1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding
                must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these
                tokens during training.
                2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can
                precompute bag-of-word counts and train much faster.
        epoch_num: ``List[int]``
            Output of epoch tracker
        """
        if self.batch_num in []:
            bp()
        # For easy transfer to the GPU.
        self.device = self.vae.get_beta().device  # pylint: disable=W0201
        # bp()
        output_dict = {}

        self.update_npmi()
        self.update_topics(epoch_num)

        if not self.training:
            self._kld_weight = 1.0  # pylint: disable=W0201
        else:
            self.update_kld_weight(epoch_num)

        # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder
        if isinstance(tokens, dict):
            embedded_tokens = (self._bag_of_words_embedder(tokens['tokens'])
                               .to(device=self.device))
        else:
            embedded_tokens = tokens
        # embedded_tokens = embedded_tokens.sum(1)
        # Encode the text into a shared representation for both the VAE
        # and downstream classifiers to use.
        # bp()
        encoder_output = self.vae.encoder(embedded_tokens)

        # Perform variational inference.
        variational_output = self.vae(encoder_output)

        # Reconstructed bag-of-words from the VAE with background bias.
        reconstructed_bow = variational_output['reconstruction'] + self._background_freq

        # Apply batchnorm to the reconstructed bag of words.
        # Helps with word variety in topic space.
        reconstructed_bow = self.bow_bn(reconstructed_bow)

        # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b)
        reconstruction_loss = self.bow_reconstruction_loss(reconstructed_bow, embedded_tokens)

        # KL-divergence that is returned is the mean of the batch by default.
        negative_kl_divergence = variational_output['negative_kl_divergence']

        # Compute ELBO
        elbo = negative_kl_divergence * self._kld_weight + reconstruction_loss

        loss = -torch.mean(elbo)
        open(f"{self.vae._get_name()}_loss.txt", "a+").write(f"{loss} \n")
        if torch.isnan(loss):
            bp()
        output_dict['loss'] = loss
        theta = variational_output['theta']

        # Keep track of internal states for use downstream
        activations: List[Tuple[str, torch.FloatTensor]] = []
        intermediate_input = embedded_tokens
        for layer_index, layer in enumerate(self.vae.encoder._linear_layers):  # pylint: disable=protected-access
            intermediate_input = layer(intermediate_input)
            activations.append((f"encoder_layer_{layer_index}", intermediate_input))

        activations.append(('theta', theta))

        output_dict['activations'] = activations
        # bp()
        # Update metrics
        self.metrics['nkld'](-torch.mean(negative_kl_divergence))
        self.metrics['nll'](-torch.mean(reconstruction_loss))

        # batch_num is tracked for kl weight annealing
        self.batch_num += 1

        self.metrics['npmi'] = self._cur_npmi

        return output_dict
Пример #11
0
        if isfile(resume_ckpt):
            print("=> loading checkpoint '{}'".format(resume_ckpt))
            checkpoint = torch.load(resume_ckpt,
                                    map_location=lambda storage, loc: storage)
            opt.start_epoch = checkpoint['epoch']
            best_metric = checkpoint['best_score']
            model.load_state_dict(checkpoint['state_dict'])
            model = model.to(device)
            if opt.mode == 'train':
                optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                resume_ckpt, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(resume_ckpt))

    bp()
    if opt.mode.lower() == 'test':
        print('===> Running evaluation step')
        epoch = 1
        recalls = test(whole_test_set, epoch, write_tboard=False)
    elif opt.mode.lower() == 'cluster':
        print('===> Calculating descriptors and clusters')
        get_clusters(whole_train_set)
    elif opt.mode.lower() == 'train':
        print('===> Training model')
        writer = SummaryWriter(log_dir=join(
            opt.runsPath,
            datetime.now().strftime('%b%d_%H-%M-%S') + '_' + opt.arch + '_' +
            opt.pooling))

        # write checkpoints in logdir
Пример #12
0
    def forward(self,
                entity_vector: torch.FloatTensor):  # pylint: disable = W0221
        """
        Given the input representation, produces the reconstruction from theta
        as well as the negative KL-divergence, theta itself, and the parameters
        of the distribution.
        """
        output = {}
        # bp()
        # get shape dim for later use
        batch_size, max_num_entity, _ = entity_vector.shape
        # prior -- N(0, 1)
        p_params = {
            "mean": self.p_mu.repeat(batch_size, 1),
            "sigma": self.p_sigma.repeat(batch_size, 1),
            "log_variance": self.p_log_var.repeat(batch_size, 1)
        }

        # estimate persona in bottom-up direction
        s_tilde = self.encoder_entity(entity_vector)
        e_tilde = gumbel_softmax(s_tilde)
        g_tilde = self.pooling_layer(e_tilde, dim=1)
        # g_tilde = (batch_size, P)
        if self.pooling_func == "max":
            g_tilde = g_tilde[0]
        if g_tilde.shape[1] != self.encoder_entity_global.get_input_dim():
            bp()
        g_tilde_hidden = self.encoder_entity_global(g_tilde)
        type_params = self.estimate_params(g_tilde_hidden,
                                           self.mean_projection_type,
                                           self.log_var_projection_type,
                                           self.mean_bn_type,
                                           self.log_var_bn_type)
        # calculate for the distribution for document representation
        # estimate the intermediate document representation
        d = self.reparameterize(type_params)
        theta = self._z_dropout(d)
        theta = torch.softmax(theta, dim=-1)
        output.update({
            "theta":
            theta,
            "type_params":
            type_params,
            "type_negative_kl_divergence":
            self.compute_negative_kld(q_params=type_params, p_params=p_params)
        })

        f = self._decoder_type.weight.t()
        if self._stochastic_weight:
            f = torch.nn.functional.softmax(f, dim=1)
        if self._apply_batchnorm_on_decoder:
            f = self.decoder_bn_topic(f)

        # (batch_size, num_type) -> (batch_size, P) = global persona representation
        g = theta @ f
        output["global_persona"] = g
        # decode type representation to persona representation
        # (batch_size, max_num_entity, P) -- equivalent to sampling from multinomial(n=1, p_1, ... p_P)
        persona_proportion = gumbel_softmax(
            g.unsqueeze(1).repeat(1, max_num_entity, 1))

        q_persona_params = {"logit": g_tilde}
        p_persona_params = {"logit": g}

        persona_proportion = self._z_dropout(persona_proportion)
        # bp()
        output.update({
            "persona":
            persona_proportion,
            "persona_params":
            q_persona_params,
            "persona_negative_kl_divergence":
            self.compute_negative_kld(q_params=q_persona_params,
                                      p_params=p_persona_params,
                                      type="multinomial")
        })
        # bp()
        # decode persona representation to topic representation
        W = self._decoder_persona.weight.t()
        if self._apply_batchnorm_on_decoder:
            W = self.decoder_bn_persona(W)
        if self._stochastic_weight:
            W = torch.nn.functional.softmax(W, dim=1)
        # bp()
        # persona_reconstruction = topic_proportion calculated from persona proportion
        persona_reconstruction = torch.softmax(persona_proportion @ W, dim=-1)
        output["persona_reconstruction"] = persona_reconstruction

        # decode topic representation(proportion) to distribution over word(unnormalized)
        beta = self._decoder_topic.weight.t()
        if self._apply_batchnorm_on_decoder:
            beta = self.decoder_bn_topic(beta)
        if self._stochastic_weight:
            beta = torch.nn.functional.softmax(beta, dim=1)
        # bp()
        bow_reconstruction = persona_reconstruction @ beta
        output["bow_reconstruction"] = bow_reconstruction

        return output
Пример #13
0
    def forward(self,
                entity_vector: torch.FloatTensor):  # pylint: disable = W0221
        """
        Given the input representation, produces the reconstruction from theta
        as well as the negative KL-divergence, theta itself, and the parameters
        of the distribution.
        """
        output = {}
        batch_size, max_num_entity, _ = entity_vector.shape
        # prior -- N(0, 1)
        p_params = {
            "mean": self.p_mu,
            "sigma": self.p_sigma,
            "log_variance": self.p_log_var
        }

        # bp()
        # estimate persona
        hidden_s = self.encoder_entity(entity_vector)
        # TODO: bn on entity are not used. wonder: should we run a batch on all global entity representation
        s_params = self.estimate_params(hidden_s, self.mean_projection_entity,
                                        self.log_variance_projection_entity,
                                        self.mean_bn_entity,
                                        self.log_var_bn_entity)
        s = self.reparameterize(s_params)
        global_s, _ = s.max(1)  # free for other function choices e.g. avg(.)
        hidden_d = self.encoder_topic(global_s)
        d_params = self.estimate_params(hidden_d, self.mean_projection_topic,
                                        self.log_variance_projection_topic,
                                        self.mean_bn_topic,
                                        self.log_var_bn_topic)
        d = self.reparameterize(d_params)
        output.update({
            "d":
            d,
            "d_params":
            d_params,
            "d_negative_kl_divergence":
            self.compute_negative_kld(q_params=d_params, p_params=p_params)
        })
        d = d.unsqueeze(1).repeat(1, max_num_entity, 1)
        p_s_params = {
            "mean": d,
            "sigma": torch.ones_like(d),
            "log_variance": torch.zeros_like(d)
        }
        output.update({
            "s":
            s,
            "s_params":
            s_params,
            "s_negative_kl_divergence":
            self.compute_negative_kld(q_params=s_params, p_params=p_s_params)
        })

        e = torch.softmax(s, dim=-1)
        beta = self._decoder_persona.weight.t()
        if self._apply_batchnorm_on_decoder:
            beta = self.decoder_bn_persona(beta)
        if self._stochastic_beta:
            beta = torch.nn.functional.softmax(beta, dim=1)
        e_reconstruction = e @ beta
        output["e_reconstruction"] = e_reconstruction
        bp()
        return output
Пример #14
0
 def getIDConf(self):
     if self.checking_return_value() < 0:
         print("Error : vps.py's return value")
         bp()
     return self.vps_IDandConf
Пример #15
0
    def forward(
            self,  # pylint: disable=arguments-differ
            doc: Union[Dict[str, torch.IntTensor], torch.IntTensor],
            entities: Union[Dict[str, torch.IntTensor], torch.IntTensor],
            epoch_num: List[int] = None):
        """
        Parameters
        ----------
        doc: ``Union[Dict[str, torch.IntTensor], torch.IntTensor]``
            A batch of tokens. We expect tokens to be represented in one of two ways:
                1. As token IDs. This representation will be used with downstream models, where bag-of-word count embedding
                must be done on the fly. If token IDs are provided, we use the bag-of-word-counts embedder to embed these
                tokens during training.
                2. As pre-computed bag of words vectors. This representation will be used during pretraining, where we can
                precompute bag-of-word counts and train much faster.
        epoch_num: ``List[int]``
            Output of epoch tracker
        """
        # bp()
        if self.batch_num in []:
            bp()
        # For easy transfer to the GPU.
        self.device = self.vae.get_beta().device  # pylint: disable=W0201

        output_dict = {}

        self.update_npmi()
        self.update_topics_and_personas(epoch_num)

        if not self.training:
            self._kld_weight = 1.0  # pylint: disable=W0201
        else:
            self.update_kld_weight(epoch_num)
        # bp()
        # if you supply input as token IDs, embed them into bag-of-word-counts with a token embedder
        if isinstance(entities, dict):
            embedded_entities = (self._bag_of_words_embedder(
                entities['tokens']).to(device=self.device))
        else:
            embedded_entities = entities
        # Encode the text into a shared representation for both the VAE
        # and downstream classifiers to use.
        # bp()
        variational_output = self.vae(embedded_entities)
        entities_mask = (embedded_entities.sum(-1) != 0).float()
        # bp()
        # Reconstructed bag-of-words from the VAE with background bias.
        # doc_reconstructed_bow = variational_output['doc_reconstruction'] + self._background_freq
        entity_reconstructed_bow = variational_output[
            'bow_reconstruction'] + self._background_freq

        # Apply batchnorm to the reconstructed bag of words.
        # Helps with word variety in topic space.
        # doc_reconstructed_bow = self.doc_bow_bn(doc_reconstructed_bow)
        # entity_reconstructed_bow = self.entity_bow_bn(entity_reconstructed_bow) * entities_mask.unsqueeze(-1)

        # Reconstruction log likelihood: log P(x | z) = log softmax(z beta + b)
        # reconstruction_loss = self.bow_reconstruction_loss(doc_reconstructed_bow, embedded_docs)
        # bp()
        reconstruction_loss = (self.bow_reconstruction_loss(
            entity_reconstructed_bow, embedded_entities) *
                               entities_mask).sum(1)

        # KL-divergence that is returned is the mean of the batch by default.
        doc_negative_kl_divergence = variational_output[
            'type_negative_kl_divergence']
        # masked sum of entity KL-divergence since there are some paddings
        # bp()
        entity_negative_kl_divergence = variational_output[
            "persona_negative_kl_divergence"] * entities_mask.sum(1)
        # total KL-divergence is the sum of doc's KL and entities' KL
        negative_kl_divergence = doc_negative_kl_divergence * self._doc_kld_weight \
                                 + entity_negative_kl_divergence * self._entity_kld_weight
        # Compute ELBO
        elbo = negative_kl_divergence + reconstruction_loss
        # bp()
        loss = -torch.mean(elbo)
        if torch.isnan(loss):
            bp()
        output_dict['loss'] = loss
        # bp()
        # Update metrics
        self.metrics['nkld'](-torch.mean(negative_kl_divergence))
        self.metrics['d_nkld'](-torch.mean(doc_negative_kl_divergence))
        self.metrics['e_nkld'](-torch.mean(entity_negative_kl_divergence))
        self.metrics['nll'](-torch.mean(reconstruction_loss))
        # bp()
        # batch_num is tracked for kl weight annealing
        self.batch_num += 1

        self.metrics['e_npmi'] = self._cur_entity_npmi
        self.metrics['d_npmi'] = self._cur_doc_npmi

        return output_dict