Пример #1
0
    def forward_i(self, data, iword_indicator, iword_numerals,
                  iword_numeral_length):
        v = LT(data)
        v = v.cuda() if self.is_cuda else v
        embed = self.ivectors(v)

        # B x T x F
        if iword_numerals.size()[0] == 0:
            return embed

        iword_numerals = iword_numerals.cuda(
        ) if self.is_cuda else iword_numerals
        iword_numeral_length = iword_numeral_length.cuda(
        ) if self.is_cuda else iword_numeral_length
        iword_numeral_length_permuted, perm_idx = iword_numeral_length.sort(
            0, descending=True)
        iword_numerals_permuted = iword_numerals[perm_idx]
        packed_input = pack_padded_sequence(iword_numerals_permuted,
                                            iword_numeral_length_permuted,
                                            batch_first=True)
        invert_perm_idx = self.invert_permutation(perm_idx)

        # assert t.equal(iword_numerals_permuted[invert_perm_idx], iword_numerals)
        # assert iword_indicator.sum() == iword_numerals.size()[0]

        if self.scheme == 'LSTM':
            _, (hn, cn) = self.digital_RNN_i(packed_input)

        else:
            _, hn = self.digital_RNN_i(packed_input)

        # TODO: how to check?
        embed[iword_indicator] = hn.squeeze(0)[invert_perm_idx]

        return embed
Пример #2
0
    def forward_o(self, data, owords_indicator, owords_numerals,
                  owords_numeral_length):
        v = LT(data)
        v = v.cuda() if self.ovectors.weight.is_cuda else v
        embed = self.ovectors(v)
        if owords_numerals.size()[0] == 0:
            return embed

        owords_numerals = owords_numerals.cuda(
        ) if self.is_cuda else owords_numerals
        owords_numeral_length = owords_numeral_length.cuda(
        ) if self.is_cuda else owords_numeral_length
        owords_numeral_length_permuted, perm_idx = owords_numeral_length.sort(
            0, descending=True)
        owords_numerals_permuted = owords_numerals[perm_idx]
        packed_input = pack_padded_sequence(owords_numerals_permuted,
                                            owords_numeral_length_permuted,
                                            batch_first=True)
        invert_perm_idx = self.invert_permutation(perm_idx)

        assert t.equal(owords_numerals_permuted[invert_perm_idx],
                       owords_numerals)
        assert owords_indicator.sum() == owords_numerals.size()[0]

        if self.scheme == 'LSTM':
            _, (hn, cn) = self.digital_RNN_o(packed_input)
        else:
            _, hn = self.digital_RNN_o(packed_input)

        embed[owords_indicator] = hn.squeeze(0)[invert_perm_idx]

        return embed
Пример #3
0
    def decode(self, encoder_outputs: Tensor,
               encoder_output_lengths: Tensor) -> Tensor:
        """
        Decode encoder_outputs.

        Args:
            encoder_outputs (torch.FloatTensor): A output sequence of encoder. `FloatTensor` of size
                ``(batch, seq_length, dimension)``
            encoder_output_lengths (torch.LongTensor): The length of encoder outputs. ``(batch)``

        Returns:
            * predicted_log_probs (torch.FloatTensor): Log probability of model predictions.
        """
        hidden_states, attn = None, None
        outputs = list()

        batch_size = encoder_outputs.size(0)
        input_var = LongTensor([self.sos_id] * batch_size).view(batch_size, 1)

        if torch.cuda.is_available():
            input_var = input_var.cuda()

        for di in range(self.max_length):
            step_outputs, hidden_states, attn = self.forward_step(
                input_var=input_var,
                hidden_states=hidden_states,
                encoder_outputs=encoder_outputs,
                attn=attn,
            )
            input_var = step_outputs.topk(1)[1]
            outputs.append(input_var)

        outputs = torch.stack(outputs, dim=1).squeeze(2)

        return outputs
Пример #4
0
    def predict(self, masked_sentence, fold_case=False):
        """Predict the masked word in `masked_sentence`.

        Note that the output probability distribution is unnormalized.

        Parameters
        ----------
        masked_sentence : str
            Sentence with one token masked out
        fold_case : bool
            Whether or not to average predictions over different casings.

        Returns
        -------
        pd.DataFrame
            The unnormalized probability distribution over BERT's vocab of
            each word in the masked position.

        """
        tokens = START + self.tokenize(masked_sentence) + END
        target_index = tokens.index(MASK)
        token_ids = self.tokens_to_ids(tokens)
        tensor = LongTensor(token_ids).unsqueeze(0)
        if self.gpu:
            tensor = tensor.cuda()
        probs = self.model(tensor)[0][0, target_index]
        if self.gpu:
            probs = probs.cpu()
        probs = pd.DataFrame(probs.data.numpy(),
                             index=self.index,
                             columns=["p"])
        if fold_case:
            probs.index = probs.index.str.lower()
            return probs.groupby("word").mean()
        return probs
Пример #5
0
    def _computer_score(self, emissions: torch.Tensor, tags: torch.LongTensor,
                        mask: torch.ByteTensor) -> torch.Tensor:

        # batch second
        assert emissions.dim() == 3 and tags.dim() == 2
        assert emissions.shape[:2] == tags.shape
        assert emissions.size(2) == self.num_tags
        assert mask.shape == tags.shape
        assert mask[0].all()
        tags.cuda()

        # 62        32
        seq_length, batch_size = tags.shape
        mask = mask.float().cuda()

        # self.start_transitions  start 到其他tag(不包含end)的得分

        score = self.start_transitions[
            tags[0]]  # tag[0].shape = [32] 每一句的第一个单词,start到其它tag的得分,随机给一个值
        # code.interact(local = locals())

        score += emissions[0, torch.arange(batch_size),
                           tags[0]]  # 计算所有句子中第一个单词的发射的得分

        for i in range(1, seq_length):  # [1,2,...,seq_length-1]
            # if mask[i].sum() == 0:
            #     break
            # transitions[i][j] 表示从第i个tag 到第j个tag的分数
            score += self.transitions[tags[i - 1], tags[i]] * mask[i]  # Aij

            score += emissions[i, torch.arange(batch_size),
                               tags[i]] * mask[i]  # P{i,y_j}

        # 这里是为了获取每一个样本最后一个词的tag。
        # shape: (batch_size,)   每一个batch 的真实长度
        # .long 变成整型 .sum(dim=0) 计算每个句子中一共有多少个字
        seq_ends = mask.long().sum(dim=0) - 1

        # 每个样本最后一个词的tag
        last_tags = tags[seq_ends, torch.arange(batch_size)]

        # shape: (batch_size,) 每一个样本到最后一个词的得分加上之前的score
        score += self.end_transitions[last_tags]

        return score
Пример #6
0
 def wrap(b: torch.LongTensor):
     if b is None:
         return b
     if len(b.size()) > 1 and isinstance(b, list):
         b = torch.stack(b, 0)
     b = b.contiguous()
     if self.cuda:
         b = b.cuda()
     b = Variable(b, volatile=self.volatile, requires_grad=False)
     return b
Пример #7
0
    def forward_o(self, data, owords_indicator, owords_numerals):
        v = LT(data)
        v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v
        embed = self.ovectors(v)
        if owords_numerals.size()[0] == 0:
            return embed

        numeral_embed = self.get_numeral_embed_batch(owords_numerals)

        # [num_of_numerals x prototype_size ]  x [prototype_size x embedding_size] => [num_of_numeral x embedding_size]
        embed[owords_indicator] = numeral_embed

        return embed
Пример #8
0
    def forward_o(self, data, owords_indicator, owords_numerals):
        v = LT(data)
        v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v
        embed = self.ovectors(v)
        if owords_numerals.size()[0] == 0 or self.gmm_posterior is None:
            return embed

        prototype_weights = self.get_numeral_embed_weights_batch(
            owords_numerals)  # [prototype_size x num_of_numerals]
        numeral_embed = t.matmul(prototype_weights,
                                 self.oprototypes_embeddings)
        # [num_of_numerals x prototype_size ]  x [prototype_size x embedding_size] => [num_of_numeral x embedding_size]
        embed[owords_indicator] = numeral_embed

        return embed
Пример #9
0
    def forward(
        self,
        entities: torch.LongTensor,  # [e1, ..., en] : [batch, ent_n]
        relations: torch.LongTensor
    ) -> torch.FloatTensor:  # [s1, ..., sm] : [batch, rel_size]

        assert entities.size()[-1] == relations.size(
        )[-1] - 1, "size entity list should match relation list"

        if torch.cuda.is_available():
            entities, relations = entities.cuda(), relations.cuda()

        ent_embed = self.e_embedding(entities)  # [batch, len_ent, e_embed]
        rel_embed = self.r_embedding(
            relations)  # [batch, len_ent - 1, r_embed]

        null_to_cat = self.null.repeat(relations.size()[0], 1, 1)
        rel_embed = self.concat([rel_embed, null_to_cat], dim=1)

        ent_proj = self.W_eh(ent_embed)
        rel_proj = self.W_rh(rel_embed)

        rnn_out, _ = self.RNN(ent_proj + rel_proj)
        return self.sim_score(rnn_out, self.r_embedding.weight)
Пример #10
0
    def forward_i(self, data, iword_indicator, iword_numerals):
        v = LT(data)
        v = v.cuda(self.ivectors.weight.device) if self.is_cuda else v
        embed = self.ivectors(v)

        if iword_numerals.size()[0] == 0:
            return embed

        # prototype_weights = self.get_numeral_embed_weights_batch(iword_numerals) # [ num_of_numerals x prototype_size]
        # numeral_embed = t.matmul(prototype_weights, self.iprototypes_embeddings)
        # [num_of_numerals x prototype_size ]  x [prototype_size x embedding_size] => [num_of_numeral x embedding_size]
        numeral_embed = self.get_numeral_embed_batch(iword_numerals)

        embed[iword_indicator] = numeral_embed

        return embed
    def next_target(self, mode, cuda, device_id):
        if mode == TRAIN_MODE: target_id = self.train.next_items(1)[0]
        elif mode == DEV_MODE: target_id = self.dev.next_items(1)[0]
        elif mode == TEST_MODE: target_id = self.test.next_items(1)[0]

        _1d_feature, _2d_feature = get_features(target_id)
        contact_map = read_contact_map(target_id)

        # Convert to FloatTensors
        _1d_feature = FloatTensor(np.expand_dims(_1d_feature, 0))
        _2d_feature = FloatTensor(np.expand_dims(_2d_feature, 0))
        contact_map = LongTensor(np.expand_dims(contact_map, 0))

        if cuda:
            _1d_feature = _1d_feature.cuda(device_id)
            _2d_feature = _2d_feature.cuda(device_id)
            contact_map = contact_map.cuda(device_id)

        return target_id, _1d_feature, _2d_feature, contact_map
Пример #12
0
    def forward(self, x: torch.LongTensor):
        [batch_size, sent_len] = x.size()
        padding_ = Variable(torch.LongTensor([self._c_pad] * self._context))
        x = torch.cat((padding_, x, padding_), dim=1)
        if self.gpu:
            x = x.cuda()

        embedding = torch.stack([
            embed(x[:, i:(i + sent_len)])
            for i, embed in enumerate(self.embedding)
        ],
                                dim=3)
        multiple = []

        for i in range(2 * self._context + 1):
            multiple.append(embedding[:, :, :, self._context] *
                            embedding[:, :, :, i])

        multiple = torch.sum(multiple, dim=3)

        context = [
            self.context.view(1, self._embed_size, self._attn)
            for _ in range(batch_size)
        ]
        context = torch.cat(context, 0).contiguous()

        multi_rep = F.tanh(self.attn_linear(multiple))

        alpha = torch.bmm(
            multi_rep, context)  # [batch, len, embed] x [batch, embed, attn]
        alpha = torch.softmax(alpha, 1)  # [batch, len, attn]
        alpha = torch.transpose(alpha, 1, 2)  # [batch, attn, len]

        multiple = torch.bmm(alpha, multiple).view(batch_size,
                                                   -1)  # [batch, attn x embed]
        return self.classifier(multiple)
Пример #13
0
    def learn(self, state, action, reward, next_state, done):
        # Memorize experience
        self.memory.append((state, action, reward, next_state, done))
        self.episode_reward += reward
        self.total_steps += 1

        # End of episode
        if done:
            self.num_episode += 1  # Episode counter
            self.logger.log_dict(
                self.total_steps, {
                    'episode_reward': self.episode_reward,
                    'memory_size': len(self.memory),
                })
            self.epsilons.append(self.epsilon)  # Log epsilon value

            # Epislone decay
            self.epsilon = max(self.epsilon * self.epsilon_decay,
                               self.epsilon_end)
            self.episode_reward = 0

        # Periodically update target network with current one
        if self.num_episode % self.target_update_interval == 0:
            self.target_qnetwork.load_state_dict(self.qnetwork.state_dict())

        # Train when we have enough experiences in the replay memory
        if len(self.memory) > self.batch_size:
            # Sample batch of experience
            batch = random.sample(self.memory, self.batch_size)
            state, action, reward, next_state, done = zip(*batch)

            action = LongTensor(action)
            reward = Tensor(reward)
            done = Tensor(done)

            if torch.cuda.is_available():
                action = action.cuda()
                reward = reward.cuda()
                done = done.cuda()

            # Q-value for current state given current action
            q_values = self.qnetwork(state)
            q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)

            # Compute the TD target
            next_q_values = self.target_qnetwork(next_state)
            next_q_value = next_q_values.max(1)[0]

            td_target = reward + self.gamma * next_q_value * (1 - done)

            # Optimize quadratic loss
            loss = (q_value - td_target.detach()).pow(2).mean()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            self.logger.log_dict(
                self.total_steps, {
                    'dqn/loss': loss.data.cpu().numpy(),
                    'dqn/reward': reward.mean().data.cpu().numpy(),
                })
Пример #14
0
 def forward_2(self, nodes):
     v = LT(nodes.data.numpy())
     v = v.cuda() if self.vectors_2.weight.is_cuda else v
     return self.vectors_2(v)
Пример #15
0
def get_activations(ims,
                    model,
                    batch_size=50,
                    dims=2048,
                    cuda=False,
                    verbose=False):
    """Calculates the activations of the pool_3 layer for all images.
    Params:
    -- files       : List of image files paths
    -- model       : Instance of inception model
    -- batch_size  : Batch size of images for the model to process at once.
                     Make sure that the number of samples is a multiple of
                     the batch size, otherwise some samples are ignored. This
                     behavior is retained to match the original FID score
                     implementation.
    -- dims        : Dimensionality of features returned by Inception
    -- cuda        : If set to True, use GPU
    -- verbose     : If set to True and parameter out_step is given, the number
                     of calculated batches is reported.
    Returns:
    -- A numpy array of dimension (num images, dims) that contains the
       activations of the given tensor when feeding inception with the
       query tensor.
    """
    model.eval()

    # if ims.size(0) % batch_size != 0:
    #     print(('Warning: number of images is not a multiple of the '
    #            'batch size. Some samples are going to be ignored.'))
    # if batch_size > ims.size(0):
    #     print(('Warning: batch size is bigger than the data size. '
    #            'Setting batch size to data size'))
    #     batch_size = ims.size(0)

    n_batches = ims.size(0) // batch_size
    n_used_imgs = n_batches * batch_size

    pred_arr = np.empty((n_used_imgs, dims))

    for i in range(n_batches):
        if verbose:
            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
                  end='',
                  flush=True)
        start = i * batch_size
        end = start + batch_size
        cur_index = LongTensor(range(start, end))
        if cuda:
            cur_index = cur_index.cuda()
        batch = index_select(ims, 0, Variable(cur_index))
        pred = model(batch)[0]

        # If model output is not scalar, apply global spatial average pooling.
        # This happens if you choose a dimensionality not equal 2048.
        if pred.shape[2] != 1 or pred.shape[3] != 1:
            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))

        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)

    if verbose:
        print(' done')

    return pred_arr
Пример #16
0
    def forward(self,  # type: ignore
                tokens: Dict[str, torch.LongTensor],
                verb_indicator: torch.LongTensor,
                tags: torch.LongTensor = None,
                training: bool = False,  # added by ph to make function consistent with other model
                metadata: List[Dict[str, Any]] = None) -> Dict[str, torch.Tensor]:
        """
        Parameters
        ----------
        tokens : Dict[str, torch.LongTensor], required
            The output of ``TextField.as_array()``, which should typically be passed directly to a
            ``TextFieldEmbedder``. This output is a dictionary mapping keys to ``TokenIndexer``
            tensors.  At its most basic, using a ``SingleIdTokenIndexer`` this is: ``{"tokens":
            Tensor(batch_size, num_tokens)}``. This dictionary will have the same keys as were used
            for the ``TokenIndexers`` when you created the ``TextField`` representing your
            sequence.  The dictionary is designed to be passed directly to a ``TextFieldEmbedder``,
            which knows how to combine different word representations into a single vector per
            token in your input.
        verb_indicator: torch.LongTensor, required.
            An integer ``SequenceFeatureField`` representation of the position of the verb
            in the sentence. This should have shape (batch_size, num_tokens) and importantly, can be
            all zeros, in the case that the sentence has no verbal predicate.
        tags : torch.LongTensor, optional (default = None)
            A torch tensor representing the sequence of integer gold class labels
            of shape ``(batch_size, num_tokens)``
        metadata : ``List[Dict[str, Any]]``, optional, (default = None)
            metadata containing the original words in the sentence and the verb to compute the
            frame for, under 'words' and 'verb' keys, respectively.
        training : added by ph to make function consistent with other model - does nothing

        Returns
        -------
        An output dictionary consisting of:
        logits : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            unnormalised log probabilities of the tag classes.
        class_probabilities : torch.FloatTensor
            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
            a distribution of the tag classes per word.
        loss : torch.FloatTensor, optional
            A scalar loss to be optimised.

        """

        # added by ph
        tokens['tokens'] = tokens['tokens'].cuda()
        verb_indicator = verb_indicator.cuda()
        if tags is not None:
            tags = tags.cuda()

        embedded_text_input = self.embedding_dropout(self.text_field_embedder(tokens))
        mask = get_text_field_mask(tokens)
        embedded_verb_indicator = self.binary_feature_embedding(verb_indicator.long())
        # Concatenate the verb feature onto the embedded text. This now
        # has shape (batch_size, sequence_length, embedding_dim + binary_feature_dim).
        embedded_text_with_verb_indicator = torch.cat([embedded_text_input, embedded_verb_indicator], -1)
        batch_size, sequence_length, _ = embedded_text_with_verb_indicator.size()

        encoded_text = self.encoder(embedded_text_with_verb_indicator, mask)

        logits = self.tag_projection_layer(encoded_text)
        reshaped_log_probs = logits.view(-1, self.num_classes)
        class_probabilities = F.softmax(reshaped_log_probs, dim=-1).view([batch_size,
                                                                          sequence_length,
                                                                          self.num_classes])
        output_dict = {"logits": logits, "class_probabilities": class_probabilities, "mask": mask}
        # We need to retain the mask in the output dictionary
        # so that we can crop the sequences to remove padding
        # when we do viterbi inference in self.decode.

        if tags is not None:
            loss = sequence_cross_entropy_with_logits(logits,
                                                      tags,
                                                      mask,
                                                      label_smoothing=self._label_smoothing)
            output_dict["loss"] = loss

        # added by ph
        output_dict['softmax_3d'] = class_probabilities.detach().cpu().numpy()
        return output_dict
Пример #17
0
 def forward_o(self, data):
     v = LT(data)
     v = v.cuda() if self.ovectors.weight.is_cuda else v
     return self.ovectors(v)
Пример #18
0
 def forward_o(self, data):
     v = LT(data)
     v = v.cuda() if self.ovectors.weight.is_cuda else v
     return t.matmul(self.sm(self.ovectors(v)), t.transpose(self.oW, 1, 0))
Пример #19
0
    def get_batches(self, enable_cuda):
        """Create batches from data in class.

        Args:
            enable_cuda (bool): cuda batches or not

        Returns:
            list of batches
        """
        # Sort lines by the length of the English sentences
        sorted_lengths = [[
            len(x),
            len(y),
            self.word_positions(x),
            self.word_positions(y), x, y
        ] for x, y in zip(self.lines_e, self.lines_f)]
        sorted_lengths.sort()

        batches = []

        # Go through data in steps of batch size
        for i in range(0,
                       len(sorted_lengths) - self.batch_size, self.batch_size):
            max_french = max(
                [x[1] for x in sorted_lengths[i:i + self.batch_size]])
            max_english = max(
                [x[0] for x in sorted_lengths[i:i + self.batch_size]])
            batch_french = LongTensor(self.batch_size, max_french)
            batch_english = LongTensor(self.batch_size, max_english)
            batch_english_pos = LongTensor(self.batch_size, max_english)
            batch_french_pos = LongTensor(self.batch_size, max_french)

            for j, data in enumerate(sorted_lengths[i:i + self.batch_size]):
                # Map words to indices and pad with EOS tag
                fline = self.pad_list(data[5],
                                      False,
                                      max_french,
                                      pad=self.dict_f.word2index['</s>'])
                eline = self.pad_list(data[4],
                                      True,
                                      max_english,
                                      pad=self.dict_e.word2index['</s>'])

                batch_french[j, :] = LongTensor(fline)
                batch_english[j, :] = LongTensor(eline)

                e_pos = data[2] + [data[2][-1]] * (max_english - len(data[2]))
                f_pos = data[3] + [data[3][-1]] * (max_french - len(data[3]))
                batch_english_pos[j, :] = LongTensor(e_pos)
                batch_french_pos[j, :] = LongTensor(f_pos)

            batch_english = Variable(batch_english)
            batch_english_pos = Variable(batch_english_pos)
            batch_french = Variable(batch_french)
            batch_french_pos = Variable(batch_french_pos)

            if enable_cuda:
                batch_english = batch_english.cuda()
                batch_english_pos = batch_english_pos.cuda()
                batch_french = batch_french.cuda()
                batch_french_pos = batch_french_pos.cuda()

            batches.append((batch_english, batch_english_pos, batch_french))
        random.shuffle(batches)
        return batches
Пример #20
0
    def learn(self, state, action, reward, next_state, done):
        # Memorize experience
        self.memory.append((state, action, reward, next_state, done))
        self.episode_reward += reward
        self.total_steps += 1

        if len(self.priorities) > 0:
            max_priority = np.max(self.priorities)
        else:
            max_priority = 1.0
        self.priorities.append(max_priority)

        # End of episode
        if done:
            self.num_episode += 1  # Episode counter
            self.logger.log_dict(
                self.total_steps, {
                    'episode_reward': self.episode_reward,
                    'memory_size': len(self.memory),
                })
            self.epsilons.append(self.epsilon)  # Log epsilon value

            # Epislon decay
            self.epsilon = max(self.epsilon * self.epsilon_decay,
                               self.epsilon_end)
            self.episode_reward = 0

        # Periodically update target network with current one
        if self.num_episode % self.target_update_interval == 0:
            self.target_qnetwork.load_state_dict(self.qnetwork.state_dict())

        # Train when we have enough experiences in the replay memory
        if len(self.memory) > self.batch_size:
            prios = np.array(self.priorities)
            probs = prios**self.alpha
            probs /= probs.sum()

            # Sample batch of experience
            indices = np.random.choice(len(self.memory),
                                       self.batch_size,
                                       p=probs)
            batch = [self.memory[idx] for idx in indices]
            state, action, reward, next_state, done = zip(*batch)

            # Importance sampling
            total = len(self.memory)
            weights = (total * probs[indices])**(-self.beta)
            weights /= weights.max()
            weights = np.array(weights, dtype=np.float32)

            action = LongTensor(action)
            reward = Tensor(reward)
            done = Tensor(done)
            weights = Tensor(weights)

            if torch.cuda.is_available():
                action = action.cuda()
                reward = reward.cuda()
                done = done.cuda()
                weights = weights.cuda()

            # Q-value for current state given current action
            q_values = self.qnetwork(state)
            q_value = q_values.gather(1, action.unsqueeze(1)).squeeze(1)

            # Compute the TD target
            next_q_values = self.target_qnetwork(next_state)
            next_q_value = next_q_values.max(1)[0]

            td_target = reward + self.gamma * next_q_value * (1 - done)

            # Optimize quadratic loss
            loss = (q_value - td_target.detach()).abs()

            # We use the individual losses as priorities
            priorities = loss + 1e-5
            for idx, prio in zip(indices, priorities):
                self.priorities[idx] = prio.item()

            # Optimize Q-network as usual
            loss = (loss * weights).pow(2).mean()
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            self.logger.log_dict(
                self.total_steps, {
                    'dqn/loss': loss.data.cpu().numpy(),
                    'dqn/reward': reward.mean().data.cpu().numpy(),
                })