Пример #1
0
    def lookup_negative_items(self, negative_users, **kwargs):
        output = np.zeros(shape=negative_users.shape,
                          dtype=rconst.ITEM_DTYPE) - 1

        left_index = self.index_bounds[negative_users]
        right_index = self.index_bounds[negative_users + 1] - 1

        num_positives = right_index - left_index + 1
        num_negatives = self._num_items - num_positives
        neg_item_choice = stat_utils.very_slightly_biased_randint(
            num_negatives)

        # Shortcuts:
        # For points where the negative is greater than or equal to the tally before
        # the last positive point there is no need to bisect. Instead the item id
        # corresponding to the negative item choice is simply:
        #   last_postive_index + 1 + (neg_choice - last_negative_tally)
        # Similarly, if the selection is less than the tally at the first positive
        # then the item_id is simply the selection.
        #
        # Because MovieLens organizes popular movies into low integers (which is
        # preserved through the preprocessing), the first shortcut is very
        # efficient, allowing ~60% of samples to bypass the bisection. For the same
        # reason, the second shortcut is rarely triggered (<0.02%) and is therefore
        # not worth implementing.
        use_shortcut = neg_item_choice >= self._total_negatives[right_index]
        output[use_shortcut] = (
            self._sorted_train_pos_items[right_index] + 1 +
            (neg_item_choice -
             self._total_negatives[right_index]))[use_shortcut]

        if np.all(use_shortcut):
            # The bisection code is ill-posed when there are no elements.
            return output

        not_use_shortcut = np.logical_not(use_shortcut)
        left_index = left_index[not_use_shortcut]
        right_index = right_index[not_use_shortcut]
        neg_item_choice = neg_item_choice[not_use_shortcut]

        num_loops = np.max(
            np.ceil(np.log2(num_positives[not_use_shortcut])).astype(np.int32))

        for i in range(num_loops):
            mid_index = (left_index + right_index) // 2
            right_criteria = self._total_negatives[mid_index] > neg_item_choice
            left_criteria = np.logical_not(right_criteria)

            right_index[right_criteria] = mid_index[right_criteria]
            left_index[left_criteria] = mid_index[left_criteria]

        # Expected state after bisection pass:
        #   The right index is the smallest index whose tally is greater than the
        #   negative item choice index.

        assert np.all((right_index - left_index) <= 1)

        output[not_use_shortcut] = (
            self._sorted_train_pos_items[right_index] -
            (self._total_negatives[right_index] - neg_item_choice))

        assert np.all(output >= 0)

        return output
Пример #2
0
  def lookup_negative_items(self, negative_users, **kwargs):
    output = np.zeros(shape=negative_users.shape, dtype=rconst.ITEM_DTYPE) - 1

    left_index = self.index_bounds[negative_users]
    right_index = self.index_bounds[negative_users + 1] - 1

    num_positives = right_index - left_index + 1
    num_negatives = self._num_items - num_positives
    neg_item_choice = stat_utils.very_slightly_biased_randint(num_negatives)

    # Shortcuts:
    # For points where the negative is greater than or equal to the tally before
    # the last positive point there is no need to bisect. Instead the item id
    # corresponding to the negative item choice is simply:
    #   last_postive_index + 1 + (neg_choice - last_negative_tally)
    # Similarly, if the selection is less than the tally at the first positive
    # then the item_id is simply the selection.
    #
    # Because MovieLens organizes popular movies into low integers (which is
    # preserved through the preprocessing), the first shortcut is very
    # efficient, allowing ~60% of samples to bypass the bisection. For the same
    # reason, the second shortcut is rarely triggered (<0.02%) and is therefore
    # not worth implementing.
    use_shortcut = neg_item_choice >= self._total_negatives[right_index]
    output[use_shortcut] = (
        self._sorted_train_pos_items[right_index] + 1 +
        (neg_item_choice - self._total_negatives[right_index])
    )[use_shortcut]

    if np.all(use_shortcut):
      # The bisection code is ill-posed when there are no elements.
      return output

    not_use_shortcut = np.logical_not(use_shortcut)
    left_index = left_index[not_use_shortcut]
    right_index = right_index[not_use_shortcut]
    neg_item_choice = neg_item_choice[not_use_shortcut]

    num_loops = np.max(
        np.ceil(np.log2(num_positives[not_use_shortcut])).astype(np.int32))

    for i in range(num_loops):
      mid_index = (left_index + right_index) // 2
      right_criteria = self._total_negatives[mid_index] > neg_item_choice
      left_criteria = np.logical_not(right_criteria)

      right_index[right_criteria] = mid_index[right_criteria]
      left_index[left_criteria] = mid_index[left_criteria]

    # Expected state after bisection pass:
    #   The right index is the smallest index whose tally is greater than the
    #   negative item choice index.

    assert np.all((right_index - left_index) <= 1)

    output[not_use_shortcut] = (
        self._sorted_train_pos_items[right_index] -
        (self._total_negatives[right_index] - neg_item_choice)
    )

    assert np.all(output >= 0)

    return output
Пример #3
0
 def lookup_negative_items(self, negative_users, **kwargs):
     negative_item_choice = stat_utils.very_slightly_biased_randint(
         self._per_user_neg_count[negative_users])
     return self._negative_table[negative_users, negative_item_choice]
Пример #4
0
 def lookup_negative_items(self, negative_users, **kwargs):
   negative_item_choice = stat_utils.very_slightly_biased_randint(
       self._per_user_neg_count[negative_users])
   return self._negative_table[negative_users, negative_item_choice]