Exemplo n.º 1
0
    def _recently_added(self, bl, user):
        """ Filtering key point: recently added.
            If more than self.NEXT_LIMIT books available, randomly pick some
            Otherwise, pick enough ones by the updated time
            @returns: a list of isbns
        """
        now = datetime.datetime.now()
        dt = datetime.timedelta(days=30)
        one_month_ago = now - dt
        month_list = bl.isbns_after(one_month_ago)

        if len(month_list) >= self.NEXT_LIMIT:
            # enough books for random
            return random.sample(month_list, self.NEXT_LIMIT)

        # otherwise, pick enough ones by the updated time
        helper = SortHelper(user)
        sorted_isbns = helper.by_updated_time(booklist.LIST_INTERESTED)
        return sorted_isbns[:self.NEXT_LIMIT]
Exemplo n.º 2
0
    def _fetch_parse(self, user, list_type=None):
        """ It may cause problems to fetch and then parse. Do it together..
            @param list_type: which booklist to import.
        """
        if list_type is None:
            list_type = self.request.get('list_type')
            if not list_type:
                return

        try:
            datas = douban.get_book_list(user, list_type)
        except utils.errors.ParseJsonError as err:
            logging.error("ERROR while importing from Douban, user_key: " + user.key() +
                          " list_type: " + list_type)
            logging.error(err)
            self._log(err)
            return

        bl = BookList.get_or_create(user, list_type)
        bl.start_importing(len(datas))
        # also clear those in memcache
        helper = SortHelper(user)
        helper.clear(list_type)

        for related in datas:
            # also added into memcache in merge_into_datastore()
            b = related.merge_into_datastore(user, update_book=False)
            if b:
                # when already such book there, b will be None
                try:
                    url, datas = tongji.get_by_isbn(b.isbn)
                    b.set_tongji_info(url, datas)
                except Exception as err:
                    logging.error("ERROR while saving TJ info, isbn: " + b.isbn)
                    logging.error(err)
                    self._log(err)

        # after all, finish importing
        bl = BookList.get_or_create(user, list_type)
        bl.douban_amount = None
        bl.put()
        return
Exemplo n.º 3
0
    def _being_praised(self, user):
        """ Filtering by calculating scores of each book. """
        def _rating_weight(rating_score):
            """ Return the relative weight of a rating score.
                @param rating_score: In 10 points scale.
            """
            if rating_score < 6.0:
                base = -2
            elif rating_score < 7.0:
                base = -1
            elif rating_score < 7.5:
                base = 0
            elif rating_score < 7.8:
                base = 0.3
            elif rating_score < 8.0:
                base = 0.6
            elif rating_score < 8.3:
                base = 0.9
            elif rating_score < 8.5:
                base = 1.2
            elif rating_score < 8.8:
                base = 1.5
            elif rating_score < 9.0:
                base = 1.8
            else:
                base = 2
            return base + random.random()

        def _voted_weight(amount):
            """ @returns: the relative weight of rated_amount.
                Generally, the larger it is, the more it's rating can present.
            """
            if amount <= 0:
                base = -2
            elif amount < 64:
                base = -1
            else:
                # too many is also useless
                amount = max(amount, 10000)
                base = log10(amount)
            return base + random.random()

        def _pages_weight(pages):
            """ @returns: the relative weight of pages of a book.
                Generally, if pages is larger than a threshhold, it become bad.
            """
            if pages > self.PAGE_THRESHHOLD:
                return -1
            else:
                return 1

        helper = SortHelper(user)
        now = datetime.datetime.now()
        dt = datetime.timedelta(days=30)
        one_month_ago = now - dt

        datas = helper.all(booklist.LIST_DONE)
        consider_pages = False
        for d in sorted(datas, key=lambda sd: sd.updated_time, reverse=True):
            if d.updated_time < one_month_ago:
                break
            if d.pages > self.PAGE_THRESHHOLD:
                consider_pages = True
                break

        tag_helper = TagHelper(user)
        week_goals = tag_helper.isbns('thisweek')
        month_goals = tag_helper.isbns('thismonth')

        def _calculate(data):
            """ Calculate the score of that data.
                @param consider_pages: whether to take pages into account. Default is False.
            """
            r = _rating_weight(data.public_rating)
            v = _voted_weight(data.rated_amount)
            score = r * v
            if consider_pages:
                score += _pages_weight(data.pages)
            if data.isbn in week_goals or data.isbn in month_goals:
                score += 16 * random.random()
            return score

        datas = helper.all(booklist.LIST_INTERESTED)
        sorted_datas = sorted(datas, key=_calculate, reverse=True)
        return [d.isbn for d in sorted_datas[:self.NEXT_LIMIT]]