Exemplo n.º 1
0
async def update_all_books():
    try:
        motor_db = MotorBase().db
        # 获取所有书架链接游标
        books_url_cursor = motor_db.user_message.find({}, {
            'books_url.book_url': 1,
            '_id': 0
        })
        book_urls = []
        already_urls = set()
        # url_tasks = [get_the_latest_chapter(each_url) for each_url in set(book_urls)]
        # tasks = [asyncio.ensure_future(i) for i in url_tasks]
        # return await asyncio.gather(*tasks)
        async for document in books_url_cursor:
            if document:
                books_url = document['books_url']

                # 一组书架链接列表数据
                # book_urls += [book_url['book_url'] for book_url in books_url]
                for book_url in books_url:
                    chapter_url = book_url['book_url']
                    if chapter_url not in already_urls:
                        try:
                            with async_timeout.timeout(20):
                                await get_the_latest_chapter(chapter_url)
                        except Exception as e:
                            LOGGER.exception(e)
                        already_urls.add(chapter_url)

    except Exception as e:
        LOGGER.exception(e)
        return False
Exemplo n.º 2
0
class ZHNovelInfoSpider(Spider):
    start_urls = []
    request_config = {'RETRIES': 3, 'DELAY': 2, 'TIMEOUT': 10}
    motor_db = MotorBase(loop=loop).get_db()

    async def parse(self, res):
        item = await ZHNovelInfoItem.get_item(html=res.html)

        item_data = {
            'novel_name': item.novel_name,
            'author': item.author,
            'cover': item.cover,
            'abstract': item.abstract,
            'status': item.status,
            'novels_type': item.novels_type,
            'novel_chapter_url': item.novel_chapter_url,
            'target_url': res.url,
            'spider': 'zongheng',
            'updated_at': time.strftime("%Y-%m-%d %X", time.localtime()),
        }

        print('获取 {} 小说信息成功'.format(item_data['novel_name']))
        print(item_data)
        await self.motor_db.all_novels_info.update_one(
            {
                'novel_name': item_data['novel_name'],
                'spider': 'zongheng'
            }, {'$set': item_data},
            upsert=True)
Exemplo n.º 3
0
async def owllook_add_bookmark(request):
    """

    :param request:
    :return:
        :   -1  用户session失效  需要重新登录
        :   0   添加书签失败
        :   1   添加书签成功
    """
    user = request['session'].get('user', None)
    data = parse_qs(str(request.body, encoding='utf-8'))
    bookmarkurl = data.get('bookmarkurl', '')
    if user and bookmarkurl:
        url = unquote(bookmarkurl[0])
        time = get_time()
        try:
            motor_db = MotorBase().db
            res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}},
                                                         upsert=True)
            if res:
                await motor_db.user_message.update_one(
                    {'user': user, 'bookmarks.bookmark': {'$ne': url}},
                    {'$push': {'bookmarks': {'bookmark': url, 'add_time': time}}})
                LOGGER.info('书签添加成功')
                return json({'status': 1})
        except Exception as e:
            LOGGER.exception(e)
            return json({'status': 0})
    else:
        return json({'status': -1})
Exemplo n.º 4
0
async def get_tag():
    motor_db = MotorBase().get_db()
    novels = Novels()
    # 获取所有书架链接游标
    books_url_cursor = motor_db.user_message.find({}, {'books_url.book_url': 1, 'user': 1, '_id': 0})
    async for document in books_url_cursor:
        if document:
            books_url = document.get('books_url', None)
            if books_url:
                all_user = {}
                user = document['user']
                all_user[user + '_novels'] = []
                all_user[user + '_tag'] = []
                all_user[user + '_author'] = []
                for book_url in books_url:
                    chapter_url = book_url['book_url']
                    novels_name = parse_qs(urlparse(chapter_url).query).get('novels_name', '')[0]
                    all_user[user + '_novels'].append(novels_name)
                    novels_info = novels.search_name(novels_name)
                    if novels_info:
                        novels_type = novels_info['novels_type'].split('#')
                        author = novels_info['author']
                        all_user[user + '_author'].append(author)
                        all_user[user + '_tag'].extend(novels_type)
                data = {
                    'user_novels': all_user[user + '_novels'],
                    'user_tag': all_user[user + '_tag'],
                    'user_author': all_user[user + '_author'],
                }
                await motor_db.user_tag.update_one(
                    {"user": user},
                    {'$set': {'data': data, "updated_at": get_time()}}, upsert=True)
                pprint(data)
Exemplo n.º 5
0
async def owllook_delete_bookmark(request):
    """

    :param request:
    :return:
        :   -1  用户session失效  需要重新登录
        :   0   删除书签失败
        :   1   删除书签成功
    """
    user = request['session'].get('user', None)
    data = parse_qs(str(request.body, encoding='utf-8'))
    bookmarkurl = data.get('bookmarkurl', '')
    if user and bookmarkurl:
        bookmark = unquote(bookmarkurl[0])
        try:
            motor_db = MotorBase().db
            await motor_db.user_message.update_one({'user': user},
                                                   {'$pull': {'bookmarks': {"bookmark": bookmark}}})
            LOGGER.info('删除书签成功')
            return json({'status': 1})
        except Exception as e:
            LOGGER.exception(e)
            return json({'status': 0})
    else:
        return json({'status': -1})
Exemplo n.º 6
0
async def similar_user(request):
    user = request['session'].get('user', None)
    if user:
        motor_db = MotorBase().db
        try:
            similar_info = await motor_db.user_recommend.find_one(
                {'user': user})
            if similar_info:
                similar_user = similar_info['similar_user']
                user_tag = similar_info['user_tag']
                updated_at = similar_info['updated_at']
                return template('similar_user.html',
                                title='与' + user + '相似的书友',
                                is_login=1,
                                is_similar=1,
                                user=user,
                                similar_user=similar_user,
                                user_tag=user_tag,
                                updated_at=updated_at)
            else:
                return template('similar_user.html',
                                title='与' + user + '相似的书友',
                                is_login=1,
                                is_similar=1,
                                user=user)
        except Exception as e:
            LOGGER.error(e)
            return redirect('/')
    else:
        return redirect('/')
Exemplo n.º 7
0
async def owllook_add_book(request):
    """

    :param request:
    :return:
        :   -1  用户session失效  需要重新登录
        :   0   添加书架失败
        :   1   添加书架成功
    """
    user = request['session'].get('user', None)
    data = parse_qs(str(request.body, encoding='utf-8'))
    novels_name = data.get('novels_name', '')
    chapter_url = data.get('chapter_url', '')
    last_read_url = data.get('last_read_url', '')
    if user and novels_name and chapter_url:
        url = "/chapter?url={chapter_url}&novels_name={novels_name}".format(chapter_url=chapter_url[0],
                                                                            novels_name=novels_name[0])
        time = get_time()
        try:
            motor_db = MotorBase().db
            res = await motor_db.user_message.update_one({'user': user}, {'$set': {'last_update_time': time}},
                                                         upsert=True)
            if res:
                await motor_db.user_message.update_one(
                    {'user': user, 'books_url.book_url': {'$ne': url}},
                    {'$push': {
                        'books_url': {'book_url': url, 'add_time': time, 'last_read_url': unquote(last_read_url[0])}}})
                LOGGER.info('书架添加成功')
                return json({'status': 1})
        except Exception as e:
            LOGGER.exception(e)
            return json({'status': 0})
    else:
        return json({'status': -1})
Exemplo n.º 8
0
async def owllook_delete_book(request):
    """

    :param request:
    :return:
        :   -1  用户session失效  需要重新登录
        :   0   删除书架失败
        :   1   删除书架成功
    """
    user = request['session'].get('user', None)
    data = parse_qs(str(request.body, encoding='utf-8'))
    if user:
        if data.get('book_url', None):
            book_url = data.get('book_url', None)[0]
        else:
            novels_name = data.get('novels_name', '')
            chapter_url = data.get('chapter_url', '')
            book_url = "/chapter?url={chapter_url}&novels_name={novels_name}".format(chapter_url=chapter_url[0],
                                                                                     novels_name=novels_name[0])
        try:
            motor_db = MotorBase().db
            await motor_db.user_message.update_one({'user': user},
                                                   {'$pull': {'books_url': {"book_url": unquote(book_url)}}})
            LOGGER.info('删除书架成功')
            return json({'status': 1})
        except Exception as e:
            LOGGER.exception(e)
            return json({'status': 0})
    else:
        return json({'status': -1})
Exemplo n.º 9
0
async def books(request):
    user = request['session'].get('user', None)
    if user:
        motor_db = MotorBase().db
        try:
            data = await motor_db.user_message.find_one({'user': user})
            if data:
                books_url = data.get('books_url', None)
                if books_url:
                    result = []
                    for i in books_url:
                        item_result = {}
                        book_url = i.get('book_url', None)
                        last_read_url = i.get("last_read_url", "")
                        book_query = parse_qs(urlparse(book_url).query)
                        last_read_chapter_name = parse_qs(last_read_url).get(
                            'name', ['暂无'])[0]
                        item_result['novels_name'] = book_query.get(
                            'novels_name', '')[0] if book_query.get(
                                'novels_name', '') else ''
                        item_result['book_url'] = book_url
                        latest_data = await motor_db.latest_chapter.find_one(
                            {'owllook_chapter_url': book_url})
                        if latest_data:
                            item_result['latest_chapter_name'] = latest_data[
                                'data']['latest_chapter_name']
                            item_result['owllook_content_url'] = latest_data[
                                'data']['owllook_content_url']
                        else:
                            get_latest_data = await get_the_latest_chapter(
                                book_url) or {}
                            item_result[
                                'latest_chapter_name'] = get_latest_data.get(
                                    'latest_chapter_name', '暂未获取,请反馈')
                            item_result[
                                'owllook_content_url'] = get_latest_data.get(
                                    'owllook_content_url', '')
                        item_result['add_time'] = i.get('add_time', '')
                        item_result[
                            "last_read_url"] = last_read_url if last_read_url else book_url
                        item_result[
                            "last_read_chapter_name"] = last_read_chapter_name
                        result.append(item_result)
                    return template(
                        'admin_books.html',
                        title='{user}的书架 - owllook'.format(user=user),
                        is_login=1,
                        user=user,
                        is_bookmark=1,
                        result=result[::-1])
            return template('admin_books.html',
                            title='{user}的书架 - owllook'.format(user=user),
                            is_login=1,
                            user=user,
                            is_bookmark=0)
        except Exception as e:
            LOGGER.error(e)
            return redirect('/')
    else:
        return redirect('/')
Exemplo n.º 10
0
async def get_user_tag():
    motor_db = MotorBase().get_db()
    user_tag_cursor = motor_db.user_tag.find({}, {
        'data.user_tag': 1,
        'user': 1,
        '_id': 0
    })
    result = {}
    async for document in user_tag_cursor:
        if document['data']['user_tag']:
            result[document['user'].replace(
                '.', '·')] = document['data']['user_tag']

    for key, value in result.items():
        result_copy = deepcopy(result)
        del result_copy[key]
        cos = CosineSimilarity(value, result_copy)
        vector = cos.create_vector()
        resultDic = cos.calculate(vector)
        pprint(resultDic)
        # pprint(type(resultList[1]))
        await motor_db.user_recommend.update_one({"user": key}, {
            '$set': {
                'similar_user': resultDic,
                'user_tag': result[key],
                "updated_at": get_time()
            }
        },
                                                 upsert=True)
Exemplo n.º 11
0
async def update_all_books(loop, timeout=15):
    try:
        motor_db = MotorBase().get_db()
        # 获取所有书架链接游标
        books_url_cursor = motor_db.user_message.find({}, {
            'books_url.book_url': 1,
            '_id': 0
        })
        book_urls = []
        already_urls = set()
        async for document in books_url_cursor:
            if document:
                books_url = document['books_url']

                for book_url in books_url:
                    chapter_url = book_url['book_url']
                    if chapter_url not in already_urls:
                        try:
                            await get_the_latest_chapter(
                                chapter_url, loop, timeout)
                        except Exception as e:
                            LOGGER.exception(e)
                        already_urls.add(chapter_url)
                        # 一组书架链接列表数据
                        #         book_urls += [book_url['book_url'] for book_url in books_url]
                        # url_tasks = [get_the_latest_chapter(each_url, loop) for each_url in set(book_urls)]
                        # tasks = [asyncio.ensure_future(i) for i in url_tasks]
                        # try:
                        #     await asyncio.gather(*tasks)
                        # except asyncio.TimeoutError as e:
                        #     pass
    except Exception as e:
        LOGGER.exception(e)
        return False
Exemplo n.º 12
0
class ZHNovelsSpider(Spider):
    start_urls = [
        'http://book.zongheng.com/store/c0/c0/b9/u0/p1/v9/s9/t0/ALL.html'
    ]

    request_config = {'RETRIES': 8, 'DELAY': 0, 'TIMEOUT': 3}
    concurrency = 60
    motor_db = MotorBase(loop=loop).get_db()

    async def parse(self, res):
        items_data = await ZHNovelsItem.get_items(html=res.html)
        tasks = []
        for item in items_data:
            if item.novel_url:
                res_dic = {
                    'novel_url': item.novel_url,
                    'novel_name': item.novel_name,
                    'novel_author': item.novel_author,
                    'novel_author_home_url': item.novel_author_home_url,
                    'novel_type': item.novel_type,
                    'novel_cover': item.novel_cover,
                    'novel_abstract': item.novel_abstract,
                    'novel_latest_chapter': item.novel_latest_chapter,
                    'spider': 'zongheng',
                    'updated_at': time.strftime("%Y-%m-%d %X",
                                                time.localtime()),
                }
                tasks.append(asyncio.ensure_future(self.save(res_dic)))
                # if self.all_novels_col.find_one(
                #         {"novel_name": item.novel_name, 'novel_author': item.novel_author}) is None:
                #     self.all_novels_col.insert_one(res_dic)
                #     # async_callback(self.save, res_dic=res_dic)
                #     print(item.novel_name + ' - 抓取成功')
        good_nums = 0
        if tasks:
            done_list, pending_list = await asyncio.wait(tasks)
            for task in done_list:
                if task.result():
                    good_nums += 1
        print(f"共{len(tasks)}本小说,抓取成功{good_nums}本")

    async def save(self, res_dic):
        # 存进数据库
        res_dic = res_dic
        try:

            await self.motor_db.all_novels.update_one(
                {
                    'novel_url': res_dic['novel_url'],
                    'novel_name': res_dic['novel_name']
                }, {'$set': res_dic},
                upsert=True)
            print(res_dic['novel_name'] + ' - 抓取成功')
            return True
        except Exception as e:
            self.logger.exception(e)
            return False
Exemplo n.º 13
0
async def cache_others_search_ranking(spider='qidian', novel_type='全部类别'):
    motor_db = MotorBase().get_db()
    item_data = await motor_db.novels_ranking.find_one(
        {
            'spider': spider,
            'type': novel_type
        }, {
            'data': 1,
            '_id': 0
        })
    return item_data
Exemplo n.º 14
0
 async def save(self, **kwargs):
     # 存进数据库
     res_dic = kwargs.get('res_dic')
     try:
         motor_db = MotorBase().get_db()
         await motor_db.all_novels_info.update_one({
             'novel_name': res_dic['novel_name'], 'spider': 'heiyan'},
             {'$set': res_dic},
             upsert=True)
     except Exception as e:
         self.logger.exception(e)
Exemplo n.º 15
0
async def cache_owllook_search_ranking():
    motor_db = MotorBase().get_db()
    keyword_cursor = motor_db.search_records.find(
        {'count': {'$gte': 50}},
        {'keyword': 1, 'count': 1, '_id': 0}
    ).sort('count', -1).limit(35)
    result = []
    index = 1
    async for document in keyword_cursor:
        result.append({'keyword': document['keyword'], 'count': document['count'], 'index': index})
        index += 1
    return result
Exemplo n.º 16
0
async def get_user_tag():
    motor_db = MotorBase().get_db()
    user_tag_cursor = motor_db.user_tag.find({}, {
        'data': 1,
        'user': 1,
        '_id': 0
    })
    result = {}
    user_book_dict = {}

    async for document in user_tag_cursor:
        if document['data']:
            user_book_dict[document['user'].replace(
                '.', '·')] = document['data']
            result[document['user'].replace(
                '.', '·')] = document['data']['user_tag']

    for key, value in result.items():
        if not value:
            continue
        print("\nUser:"******"User tags:", set(value))
        print("User books:", user_book_dict[key]["user_novels"])
        result_copy = deepcopy(result)
        del result_copy[key]
        cos = CosineSimilarity(value, result_copy)
        vector = cos.create_vector()
        resultDic = cos.calculate(vector)
        print("相似用户:")
        pprint(resultDic[:10])
        # pprint(type(resultList[1]))
        booksDic = collections.defaultdict(float)
        for userBooks in resultDic:
            for simuser, simrate in userBooks.items():
                # print(simuser, simrate)
                for book in user_book_dict[simuser]["user_novels"]:
                    booksDic[book] += simrate
        # print(booksDic)
        # 推荐20本书,注意这里的推荐并没有去除用户收藏的书籍
        recommend = sorted(booksDic.items(), key=itemgetter(1),
                           reverse=True)[0:20]
        print("书籍推荐:")
        pprint(recommend)
        recommend_novels = [book for book, simrate in recommend]
        await motor_db.user_recommend.update_one({"user": key}, {
            '$set': {
                'similar_user': resultDic,
                'user_tag': result[key],
                'recommend_novels': recommend_novels,
                "updated_at": get_time()
            }
        },
                                                 upsert=True)
Exemplo n.º 17
0
async def cache_owllook_search_ranking():
    motor_db = MotorBase().db
    keyword_cursor = motor_db.search_records.find({
        'count': {
            '$gte': 50
        }
    }, {
        'keyword': 1,
        '_id': 0
    }).sort('count', -1).limit(25)
    result = []
    async for document in keyword_cursor:
        result.append(document['keyword'])
    return result
Exemplo n.º 18
0
class QidianNovelsSpider(Spider):
    # start_urls = ['https://www.qidian.com/all?page=1']

    request_config = {'RETRIES': 10, 'DELAY': 0, 'TIMEOUT': 3}
    concurrency = 100
    motor_db = MotorBase(loop=loop).get_db()

    async def parse(self, res):
        items_data = await QidianNovelsItem.get_items(html=res.html)
        tasks = []
        for item in items_data:
            res_dic = {
                'novel_url': item.novel_url,
                'novel_name': item.novel_name,
                'novel_author': item.novel_author,
                'novel_author_home_url': item.novel_author_home_url,
                'novel_type': item.novel_type,
                'novel_cover': item.novel_cover,
                'novel_abstract': item.novel_abstract,
                'spider': 'qidian',
                'updated_at': time.strftime("%Y-%m-%d %X", time.localtime()),
            }
            tasks.append(asyncio.ensure_future(self.save(res_dic)))

        good_nums = 0
        if tasks:
            done_list, pending_list = await asyncio.wait(tasks)
            for task in done_list:
                if task.result():
                    good_nums += 1
        print(f"共{len(tasks)}本小说,抓取成功{good_nums}本")

    async def save(self, res_dic):
        # 存进数据库
        try:
            await self.motor_db.all_novels.update_one(
                {
                    'novel_url': res_dic['novel_url'],
                    'novel_name': res_dic['novel_name']
                }, {'$set': res_dic},
                upsert=True)
            print(res_dic['novel_name'] + ' - 抓取成功')
            return True
        except Exception as e:
            self.logger.exception(e)
            return False
Exemplo n.º 19
0
 async def save(self, **kwargs):
     # 存进数据库
     res_dic = kwargs.get('res_dic')
     try:
         motor_db = MotorBase().db
         await motor_db.novels_ranking.update_one(
             {'target_url': res_dic['target_url']}, {
                 '$set': {
                     'data':
                     res_dic['data'],
                     'spider':
                     res_dic['spider'],
                     'finished_at':
                     time.strftime("%Y-%m-%d %X", time.localtime())
                 }
             },
             upsert=True)
     except Exception as e:
         self.logger.exception(e)
Exemplo n.º 20
0
async def bookmarks(request):
    user = request['session'].get('user', None)
    if user:
        motor_db = MotorBase().db
        try:
            data = await motor_db.user_message.find_one({'user': user})
            if data:
                # 获取所有书签
                bookmarks = data.get('bookmarks', None)
                if bookmarks:
                    result = []
                    for i in bookmarks:
                        item_result = {}
                        bookmark = i.get('bookmark', None)
                        query = parse_qs(urlparse(bookmark).query)
                        item_result['novels_name'] = query.get(
                            'novels_name', '')[0] if query.get(
                                'novels_name', '') else ''
                        item_result['chapter_name'] = query.get(
                            'name', '')[0] if query.get('name', '') else ''
                        item_result['chapter_url'] = query.get(
                            'chapter_url', '')[0] if query.get(
                                'chapter_url', '') else ''
                        item_result['bookmark'] = bookmark
                        item_result['add_time'] = i.get('add_time', '')
                        result.append(item_result)
                    return template(
                        'admin_bookmarks.html',
                        title='{user}的书签 - owllook'.format(user=user),
                        is_login=1,
                        user=user,
                        is_bookmark=1,
                        result=result[::-1])
            return template('admin_bookmarks.html',
                            title='{user}的书签 - owllook'.format(user=user),
                            is_login=1,
                            user=user,
                            is_bookmark=0)
        except Exception as e:
            LOGGER.error(e)
            return redirect('/')
    else:
        return redirect('/')
Exemplo n.º 21
0
    async def parse(self, res):
        self.motor_db = MotorBase(loop=self.loop).get_db()
        item = await HYNovelInfoItem.get_item(html=res.html)

        item_data = {
            'novel_name': item.novel_name,
            'author': item.author,
            'cover': item.cover,
            'abstract': item.abstract,
            'status': item.status,
            'novels_type': item.novels_type,
            'novel_chapter_url': item.novel_chapter_url,
            'latest_chapter': item.latest_chapter,
            'latest_chapter_time': item.latest_chapter_time,
            'spider': 'heiyan',
            'target_url': res.url,
            'updated_at': time.strftime("%Y-%m-%d %X", time.localtime())
        }

        print('获取 {} 小说信息成功'.format(item_data['novel_name']))
        await self.save(res_dic=item_data)
Exemplo n.º 22
0
async def owllook_register(request):
    """
    用户注册 不允许重名
    :param request:
    :return:
        :   -1  用户名已存在
        :   0   用户名或密码不能为空
        :   1   注册成功
    """
    register_data = parse_qs(str(request.body, encoding='utf-8'))
    user = register_data.get('user', [None])[0]
    pwd = register_data.get('pwd', [None])[0]
    email = register_data.get('email', [None])[0]
    answer = register_data.get('answer', [None])[0]
    reg_index = request.cookies['reg_index']
    if user and pwd and email and answer and reg_index:
        motor_db = MotorBase().db
        is_exist = await motor_db.user.find_one({'user': user})
        if not is_exist:
            # 验证问题答案是否准确
            real_answer = get_real_answer(str(reg_index))
            if real_answer and real_answer == answer:
                pass_first = hashlib.md5((WEBSITE["TOKEN"] + pwd).encode("utf-8")).hexdigest()
                password = hashlib.md5(pass_first.encode("utf-8")).hexdigest()
                time = get_time()
                data = {
                    "user": user,
                    "password": password,
                    "email": email,
                    "register_time": time,
                }
                await motor_db.user.save(data)
                return json({'status': 1})
            else:
                return json({'status': -2})
        else:
            return json({'status': -1})
    else:
        return json({'status': 0})
Exemplo n.º 23
0
async def owllook_login(request):
    """
    用户登录
    :param request:
    :return:
        :   -1  用户名或密码不能为空
        :   0   用户名或密码错误
        :   1   登陆成功
    """
    login_data = parse_qs(str(request.body, encoding='utf-8'))
    user = login_data.get('user', [None])[0]
    pwd = login_data.get('pwd', [None])[0]
    if user and pwd:
        motor_db = MotorBase().db
        data = await motor_db.user.find_one({'user': user})
        if data:
            pass_first = hashlib.md5((WEBSITE["TOKEN"] + pwd).encode("utf-8")).hexdigest()
            password = hashlib.md5(pass_first.encode("utf-8")).hexdigest()
            if password == data.get('password'):
                response = json({'status': 1})
                # 将session_id存于cokies
                date = datetime.datetime.now()
                response.cookies['owl_sid'] = request['session'].sid
                response.cookies['owl_sid']['expires'] = date + datetime.timedelta(days=30)
                response.cookies['owl_sid']['httponly'] = True
                # 此处设置存于服务器session的user值
                request['session']['user'] = user
                # response.cookies['user'] = user
                # response.cookies['user']['expires'] = date + datetime.timedelta(days=30)
                # response.cookies['user']['httponly'] = True
                # response = json({'status': 1})
                # response.cookies['user'] = user
                return response
            else:
                return json({'status': -2})
        return json({'status': -1})
    else:
        return json({'status': 0})
Exemplo n.º 24
0
async def update_all_books():
    try:
        motor_db = MotorBase().db
        # 获取所有书架链接游标
        books_url_cursor = motor_db.user_message.find({}, {
            'books_url.book_url': 1,
            '_id': 0
        })
        # 已更新url集合
        already_urls = set()
        async for document in books_url_cursor:
            if document:
                books_url = document['books_url']
                # 一组书架链接列表数据
                for book_url in books_url:
                    chapter_url = book_url['book_url']
                    if chapter_url not in already_urls:
                        await get_the_latest_chapter(chapter_url)
                        already_urls.add(chapter_url)
        return True
    except Exception as e:
        LOGGER.exception(e)
        return False
Exemplo n.º 25
0
def setup_db(operate_bp, loop):
    global motor_base
    motor_base = MotorBase()
Exemplo n.º 26
0
async def get_the_latest_chapter(chapter_url, loop=None, timeout=15):
    try:
        with async_timeout.timeout(timeout):
            url = parse_qs(urlparse(chapter_url).query).get('url', '')
            novels_name = parse_qs(urlparse(chapter_url).query).get(
                'novels_name', '')
            data = None
            if url and novels_name:
                url = url[0]
                novels_name = novels_name[0]
                netloc = urlparse(url).netloc
                if netloc in LATEST_RULES.keys():
                    async with aiohttp.ClientSession(loop=loop) as client:
                        headers = {'user-agent': await get_random_user_agent()}
                        try:
                            html = await target_fetch(client=client,
                                                      url=url,
                                                      headers=headers,
                                                      timeout=timeout)
                            if html is None:
                                html = get_html_by_requests(url=url,
                                                            headers=headers,
                                                            timeout=timeout)
                        except TypeError:
                            html = get_html_by_requests(url=url,
                                                        headers=headers,
                                                        timeout=timeout)
                        except Exception as e:
                            LOGGER.exception(e)
                            return None
                        try:
                            soup = BeautifulSoup(html, 'html5lib')
                        except Exception as e:
                            LOGGER.exception(e)
                            return None
                        latest_chapter_name, latest_chapter_url = None, None
                        if LATEST_RULES[netloc].plan:
                            meta_value = LATEST_RULES[netloc].meta_value
                            latest_chapter_name = soup.select(
                                'meta[property="{0}"]'.format(
                                    meta_value["latest_chapter_name"]))
                            latest_chapter_name = latest_chapter_name[0].get(
                                'content',
                                None) if latest_chapter_name else None
                            latest_chapter_url = soup.select(
                                'meta[property="{0}"]'.format(
                                    meta_value["latest_chapter_url"]))
                            latest_chapter_url = urljoin(
                                url, latest_chapter_url[0].get(
                                    'content',
                                    None)) if latest_chapter_url else None
                        else:
                            selector = LATEST_RULES[netloc].selector
                            content_url = selector.get('content_url')
                            if selector.get('id', None):
                                latest_chapter_soup = soup.find_all(
                                    id=selector['id'])
                            elif selector.get('class', None):
                                latest_chapter_soup = soup.find_all(
                                    class_=selector['class'])
                            else:
                                latest_chapter_soup = soup.select(
                                    selector.get('tag'))
                            if latest_chapter_soup:
                                if content_url == '1':
                                    # TODO
                                    pass
                                elif content_url == '0':
                                    # TODO
                                    pass
                                else:
                                    latest_chapter_url = content_url + latest_chapter_soup[
                                        0].get('href', None)
                                latest_chapter_name = latest_chapter_soup[
                                    0].get('title', None)
                        if latest_chapter_name and latest_chapter_url:
                            time_current = get_time()
                            # print(latest_chapter_url)
                            data = {
                                "latest_chapter_name":
                                latest_chapter_name,
                                "latest_chapter_url":
                                latest_chapter_url,
                                "owllook_chapter_url":
                                chapter_url,
                                "owllook_content_url":
                                "/owllook_content?url={latest_chapter_url}&name={name}&chapter_url={chapter_url}&novels_name={novels_name}"
                                .format(
                                    latest_chapter_url=latest_chapter_url,
                                    name=latest_chapter_name,
                                    chapter_url=url,
                                    novels_name=novels_name,
                                ),
                            }
                            # 存储最新章节
                            motor_db = MotorBase().get_db()
                            await motor_db.latest_chapter.update_one(
                                {
                                    "novels_name": novels_name,
                                    'owllook_chapter_url': chapter_url
                                }, {
                                    '$set': {
                                        'data': data,
                                        "finished_at": time_current
                                    }
                                },
                                upsert=True)
            return data
    except Exception as e:
        LOGGER.exception(e)
        return None
Exemplo n.º 27
0
async def owllook_search(request):
    start = time.time()
    name = request.args.get('wd', '').strip()
    motor_db = MotorBase().db
    if not name:
        return redirect('/')
    else:
        # 记录搜索小说名
        try:
            await motor_db.search_records.update_one({'keyword': name}, {'$inc': {'count': 1}}, upsert=True)
        except Exception as e:
            LOGGER.exception(e)
    # 通过搜索引擎获取检索结果
    parse_result = None
    for each_engine in ENGINE_PRIORITY:
        # for 360 so
        if each_engine == "360":
            novels_name = "{name} 小说 最新章节".format(name=name)
            parse_result = await cache_owllook_so_novels_result(novels_name)
            if parse_result:
                break
        if each_engine == "baidu":
            # for baidu
            novels_name = 'intitle:{name} 小说 阅读'.format(name=name) if ':baidu' not in name else name.split('baidu')[1]
            parse_result = await cache_owllook_baidu_novels_result(novels_name)
            if parse_result:
                break
    if parse_result:
        # result_sorted = sorted(
        #     parse_result, reverse=True, key=lambda res: res['timestamp']) if ':baidu' not in name else parse_result
        # 优先依靠是否解析进行排序  其次以更新时间进行排序
        result_sorted = sorted(
            parse_result, reverse=True,
            key=itemgetter('is_parse', 'timestamp')) if ':baidu' not in name else parse_result
        user = request['session'].get('user', None)
        if user:
            try:
                time_current = get_time()
                res = await motor_db.user_message.update_one({'user': user},
                                                             {'$set': {'last_update_time': time_current}},
                                                             upsert=True)
                # 此处语法操作过多  下次看一遍mongo再改
                if res:
                    is_ok = await motor_db.user_message.update_one(
                        {'user': user, 'search_records.keyword': {'$ne': name}},
                        {'$push': {'search_records': {'keyword': name, 'counts': 1}}},
                    )

                    if is_ok:
                        await motor_db.user_message.update_one(
                            {'user': user, 'search_records.keyword': name},
                            {'$inc': {'search_records.$.counts': 1}}
                        )

            except Exception as e:
                LOGGER.exception(e)
            return template(
                'result.html',
                is_login=1,
                user=user,
                name=name,
                time='%.2f' % (time.time() - start),
                result=result_sorted,
                count=len(parse_result))

        else:
            return template(
                'result.html',
                is_login=0,
                name=name,
                time='%.2f' % (time.time() - start),
                result=result_sorted,
                count=len(parse_result))

    else:
        return html("No Result!请将小说名反馈给本站,谢谢!")
Exemplo n.º 28
0
def setup_db(novels_bp, loop):
    global motor_base
    motor_base = MotorBase()
Exemplo n.º 29
0
def setup_db(admin_bp, loop):
    global motor_base
    motor_base = MotorBase()
Exemplo n.º 30
0
def setup_db(rank_bp, loop):
    global motor_base
    motor_base = MotorBase()