Exemplo n.º 1
0
    def _Magazine(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'magid' not in kwargs:
            self.data = self._error_with_message('No Magazine Provided')
            return
        links = []
        entries = []
        title = ''
        cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues "
        cmd += "WHERE Title='%s' order by IssueDate DESC"
        results = myDB.select(cmd % kwargs['magid'])
        page = results[index:(index + self.PAGE_SIZE)]
        for issue in page:
            title = makeUnicode(issue['Title'])
            entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])),
                     'id': escape('issue:%s' % issue['IssueID']),
                     'updated': opdstime(issue['IssueAcquired']),
                     'content': escape('%s - %s' % (title, issue['IssueDate'])),
                     'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType(issue['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(issue['IssueFile'])[0]
                res = cache_img('magazine', issue['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        feed = {}
        title = '%s (%s)' % (escape(title), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % title
        feed['id'] = 'magazine:%s' % escape(kwargs['magid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])),
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 2
0
    def _Magazine(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'magid' not in kwargs:
            self.data = self._error_with_message('No Magazine Provided')
            return
        links = []
        entries = []
        title = ''
        cmd = "SELECT Title,IssueID,IssueDate,IssueAcquired,IssueFile from issues "
        cmd += "WHERE Title='%s' order by IssueDate DESC"
        results = myDB.select(cmd % kwargs['magid'])
        page = results[index:(index + self.PAGE_SIZE)]
        for issue in page:
            title = makeUnicode(issue['Title'])
            entry = {'title': escape('%s (%s)' % (title, issue['IssueDate'])),
                     'id': escape('issue:%s' % issue['IssueID']),
                     'updated': opdstime(issue['IssueAcquired']),
                     'content': escape('%s - %s' % (title, issue['IssueDate'])),
                     'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(issue['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType(issue['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(issue['IssueFile'])[0]
                res = cache_img('magazine', issue['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        feed = {}
        title = '%s (%s)' % (escape(title), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % title
        feed['id'] = 'magazine:%s' % escape(kwargs['magid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(kwargs['magid'])),
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazine&magid=%s&index=%s' % (self.opdsroot, quote_plus(kwargs['magid']),
                                                                            index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 3
0
def url_fix(s, charset='utf-8'):
    if PY2 and isinstance(s, text_type):
        s = s.encode(charset, 'ignore')
    elif not PY2 and not isinstance(s, text_type):
        s = s.decode(charset)
    scheme, netloc, path, qs, anchor = urlsplit(s)
    path = quote(path, '/%')
    qs = quote_plus(qs, ':&=')
    return urlunsplit((scheme, netloc, path, qs, anchor))
Exemplo n.º 4
0
def url_fix(s, charset='utf-8'):
    if PY2 and isinstance(s, text_type):
        s = s.encode(charset, 'ignore')
    elif PY3 and not isinstance(s, text_type):
        s = s.decode(charset)
    scheme, netloc, path, qs, anchor = urlsplit(s)
    path = quote(path, '/%')
    qs = quote_plus(qs, ':&=')
    return urlunsplit((scheme, netloc, path, qs, anchor))
Exemplo n.º 5
0
def get_book_desc(isbn=None, author=None, title=None):
    """ GoodReads does not always have a book description in its api results
        due to restrictive TOS from some of its providers.
        Try to get missing descriptions from googlebooks
        Return description, empty string if not found, None if error"""
    if not author or not title:
        return ''

    author = cleanName(author)
    title = cleanName(title)
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        baseurl = 'https://www.googleapis.com/books/v1/volumes?q='

        urls = [
            baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))
        ]
        if isbn:
            urls.insert(0, baseurl + quote_plus('isbn:' + isbn))

        for url in urls:
            if lazylibrarian.CONFIG['GB_API']:
                url += '&key=' + lazylibrarian.CONFIG['GB_API']
            if lazylibrarian.CONFIG['GB_COUNTRY'] and len(
                    lazylibrarian.CONFIG['GB_COUNTRY'] == 2):
                url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY']
            results, cached = gb_json_request(url)
            if results is None:  # there was an error
                return None
            if results and not cached:
                time.sleep(1)
            if results and 'items' in results:
                for item in results['items']:
                    # noinspection PyBroadException
                    try:
                        auth = item['volumeInfo']['authors'][0]
                        book = item['volumeInfo']['title']
                        desc = item['volumeInfo']['description']
                        book_fuzz = fuzz.token_set_ratio(book, title)
                        auth_fuzz = fuzz.token_set_ratio(auth, author)
                        if book_fuzz > 98 and auth_fuzz > 80:
                            return desc
                    except Exception:
                        pass
    return ''
Exemplo n.º 6
0
def getAuthorImage(authorid=None):
    # tbm=isch      search images
    # tbs=ift:jpg  jpeg file type
    if not authorid:
        logger.error("getAuthorImage: No authorid")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, "author", authorid + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug("getAuthorImage: Returning Cached response for %s" %
                     coverfile)
        coverlink = 'cache/author/' + authorid + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    myDB = database.DBConnection()
    author = myDB.match('select AuthorName from authors where AuthorID=?',
                        (authorid, ))
    if author:
        authorname = safe_unicode(author['AuthorName'])
        if PY2:
            authorname = authorname.encode(lazylibrarian.SYS_ENCODING)
        safeparams = quote_plus("author %s" % authorname)
        URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author'
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split(
                    'src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink, success, was_in_cache = cache_img(
                    "author", authorid, img)
                if success:
                    if was_in_cache:
                        logger.debug("Returning cached google image for %s" %
                                     authorname)
                    else:
                        logger.debug("Cached google image for %s" % authorname)
                    return coverlink
                else:
                    logger.debug("Error getting google image %s, [%s]" %
                                 (img, coverlink))
            else:
                logger.debug("No image found in google page for %s" %
                             authorname)
        else:
            logger.debug("Error getting google page for %s, [%s]" %
                         (safeparams, result))
    else:
        logger.debug("No author found for %s" % authorid)
    return None
Exemplo n.º 7
0
    def _RecentAudio(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))

        cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "AudioStatus='Open' order by AudioLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            title = makeUnicode(book['BookName'])
            entry = {'title': escape(title),
                     'id': escape('audio:%s' % book['BookID']),
                     'updated': opdstime(book['AudioLibrary']),
                     'href': '%s?cmd=Serve&audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType("we_send.zip")}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                author = makeUnicode(author['AuthorName'])
                entry['image'] = self.searchroot + '/' + book['BookImg']
                entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                entry['author'] = escape('%s' % author)
            else:
                entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s result%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 8
0
    def _RecentAudio(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent AudioBooks', 'id': 'Recent AudioBooks', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentAudio' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))

        cmd = "select BookName,BookID,AudioLibrary,BookDate,BookImg,BookDesc,BookAdded,AuthorID from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "AudioStatus='Open' order by AudioLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            title = makeUnicode(book['BookName'])
            entry = {'title': escape(title),
                     'id': escape('audio:%s' % book['BookID']),
                     'updated': opdstime(book['AudioLibrary']),
                     'href': '%s?cmd=Serve&audioid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'type': mimeType("we_send.zip")}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                author = makeUnicode(author['AuthorName'])
                entry['image'] = self.searchroot + '/' + book['BookImg']
                entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                entry['author'] = escape('%s' % author)
            else:
                entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentAudio&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s result%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 9
0
def get_book_desc(isbn=None, author=None, title=None):
    """ GoodReads does not always have a book description in its api results
        due to restrictive TOS from some of its providers.
        Try to get missing descriptions from googlebooks
        Return description, empty string if not found, None if error"""
    if not author or not title:
        return ''

    author = cleanName(author)
    title = cleanName(title)
    if lazylibrarian.CONFIG['BOOK_API'] == 'GoodReads':
        baseurl = 'https://www.googleapis.com/books/v1/volumes?q='

        urls = [baseurl + quote_plus('inauthor:%s intitle:%s' % (author, title))]
        if isbn:
            urls.insert(0, baseurl + quote_plus('isbn:' + isbn))

        for url in urls:
            if lazylibrarian.CONFIG['GB_API']:
                url += '&key=' + lazylibrarian.CONFIG['GB_API']
            if lazylibrarian.CONFIG['GB_COUNTRY'] and len(lazylibrarian.CONFIG['GB_COUNTRY'] == 2):
                url += '&country=' + lazylibrarian.CONFIG['GB_COUNTRY']
            results, cached = gb_json_request(url)
            if results is None:  # there was an error
                return None
            if results and not cached:
                time.sleep(1)
            if results and 'items' in results:
                for item in results['items']:
                    # noinspection PyBroadException
                    try:
                        auth = item['volumeInfo']['authors'][0]
                        book = item['volumeInfo']['title']
                        desc = item['volumeInfo']['description']
                        book_fuzz = fuzz.token_set_ratio(book, title)
                        auth_fuzz = fuzz.token_set_ratio(auth, author)
                        if book_fuzz > 98 and auth_fuzz > 80:
                            return desc
                    except Exception:
                        pass
    return ''
Exemplo n.º 10
0
    def _Magazines(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)'
        cmd += ' as Iss_Cnt from magazines '
        if 'query' in kwargs:
            cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' "
        cmd += 'order by magazines.title'
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            if mag['Iss_Cnt'] > 0:
                title = makeUnicode(mag['Title'])
                entry = {
                    'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])),
                    'id': escape('magazine:%s' % title),
                    'updated': opdstime(mag['LastAcquired']),
                    'content': escape('%s' % title),
                    'href': '%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(title)),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
                # disabled cover image as it stops navigation?
                # if lazylibrarian.CONFIG['OPDS_METAINFO']:
                #     res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True)
                #     entry['image'] = self.searchroot + '/' + res[0]
                entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 11
0
    def _RecentMags(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues "
        cmd += "where IssueFile != '' "
        if 'query' in kwargs:
            cmd += "AND Title LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by IssueAcquired DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            title = makeUnicode(mag['Title'])
            entry = {'title': escape('%s' % mag['IssueDate']),
                     'id': escape('issue:%s' % mag['IssueID']),
                     'updated': opdstime(mag['IssueAcquired']),
                     'content': escape('%s - %s' % (title, mag['IssueDate'])),
                     'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'author': escape(title),
                     'type': mimeType(mag['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(mag['IssueFile'])[0]
                res = cache_img('magazine', mag['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 12
0
    def _Magazines(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Magazines', 'id': 'Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Magazines' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = 'select magazines.*,(select count(*) as counter from issues where magazines.title = issues.title)'
        cmd += ' as Iss_Cnt from magazines '
        if 'query' in kwargs:
            cmd += "WHERE magazines.title LIKE '%" + kwargs['query'] + "%' "
        cmd += 'order by magazines.title'
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            if mag['Iss_Cnt'] > 0:
                title = makeUnicode(mag['Title'])
                entry = {
                    'title': escape('%s (%s)' % (title, mag['Iss_Cnt'])),
                    'id': escape('magazine:%s' % title),
                    'updated': opdstime(mag['LastAcquired']),
                    'content': escape('%s' % title),
                    'href': '%s?cmd=Magazine&magid=%s' % (self.opdsroot, quote_plus(title)),
                    'kind': 'navigation',
                    'rel': 'subsection',
                }
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    res = cache_img('magazine', md5_utf8(mag['LatestCover']), mag['LatestCover'], refresh=True)
                    entry['image'] = self.searchroot + '/' + res[0]
                entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Magazines&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s magazine%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 13
0
    def _RecentMags(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Magazines', 'id': 'Recent Magazines', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentMags' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchmagazines.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Magazines'))
        cmd = "select Title,IssueID,IssueAcquired,IssueDate,IssueFile from issues "
        cmd += "where IssueFile != '' "
        if 'query' in kwargs:
            cmd += "AND Title LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by IssueAcquired DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for mag in page:
            title = makeUnicode(mag['Title'])
            entry = {'title': escape('%s' % mag['IssueDate']),
                     'id': escape('issue:%s' % mag['IssueID']),
                     'updated': opdstime(mag['IssueAcquired']),
                     'content': escape('%s - %s' % (title, mag['IssueDate'])),
                     'href': '%s?cmd=Serve&issueid=%s' % (self.opdsroot, quote_plus(mag['IssueID'])),
                     'kind': 'acquisition',
                     'rel': 'file',
                     'author': escape(title),
                     'type': mimeType(mag['IssueFile'])}
            if lazylibrarian.CONFIG['OPDS_METAINFO']:
                fname = os.path.splitext(mag['IssueFile'])[0]
                res = cache_img('magazine', mag['IssueID'], fname + '.jpg')
                entry['image'] = self.searchroot + '/' + res[0]
            entries.append(entry)

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentMags&index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s issue%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 14
0
def getAuthorImage(authorid=None):
    # tbm=isch      search images
    # tbs=ift:jpg  jpeg file type
    if not authorid:
        logger.error("getAuthorImage: No authorid")
        return None

    cachedir = lazylibrarian.CACHEDIR
    coverfile = os.path.join(cachedir, "author", authorid + '.jpg')

    if os.path.isfile(coverfile):  # use cached image if there is one
        lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
        logger.debug("getAuthorImage: Returning Cached response for %s" % coverfile)
        coverlink = 'cache/author/' + authorid + '.jpg'
        return coverlink

    lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
    myDB = database.DBConnection()
    author = myDB.match('select AuthorName from authors where AuthorID=?', (authorid,))
    if author:
        authorname = safe_unicode(author['AuthorName'])
        if PY2:
            authorname = authorname.encode(lazylibrarian.SYS_ENCODING)
        safeparams = quote_plus("author %s" % authorname)
        URL = "https://www.google.com/search?tbm=isch&tbs=ift:jpg,itp:face&as_q=" + safeparams + 'author'
        result, success = fetchURL(URL)
        if success:
            try:
                img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
            except IndexError:
                img = None
            if img and img.startswith('http'):
                coverlink, success, was_in_cache = cache_img("author", authorid, img)
                if success:
                    if was_in_cache:
                        logger.debug("Returning cached google image for %s" % authorname)
                    else:
                        logger.debug("Cached google image for %s" % authorname)
                    return coverlink
                else:
                    logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
            else:
                logger.debug("No image found in google page for %s" % authorname)
        else:
            logger.debug("Error getting google page for %s, [%s]" % (safeparams, result))
    else:
        logger.debug("No author found for %s" % authorid)
    return None
Exemplo n.º 15
0
 def multiLink(self, bookfile, bookid):
     types = []
     multi = ''
     basename, extn = os.path.splitext(bookfile)
     for item in getList(lazylibrarian.CONFIG['EBOOK_TYPE']):
         target = basename + '.' + item
         if os.path.isfile(target):
             types.append(item)
     if len(types) > 1:
         for fmt in types:
             multi += '<link href="'
             multi += '%s?cmd=Serve&amp;bookid=%s&amp;fmt=%s' % (
                 self.opdsroot, quote_plus(bookid), fmt)
             multi += '" rel="http://opds-spec.org/acquisition" type="' + mimeType(
                 '.' + fmt) + '"/>'
     return multi
Exemplo n.º 16
0
    def _Author(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'authorid' not in kwargs:
            self.data = self._error_with_message('No Author Provided')
            return
        links = []
        entries = []
        links.append(
            getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                    ftype='application/opensearchdescription+xml',
                    rel='search',
                    title='Search Books'))
        author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?",
                            (kwargs['authorid'], ))
        author = makeUnicode(author['AuthorName'])
        cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC"
        results = myDB.select(cmd, (kwargs['authorid'], ))
        page = results[index:(index + self.PAGE_SIZE)]

        for book in page:
            mime_type = None
            rel = 'file'
            if book['BookFile']:
                mime_type = self.multiLink(book['BookFile'], book['BookID'])
                if mime_type:
                    rel = 'multi'
                else:
                    mime_type = mimeType(book['BookFile'])

            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])

            if mime_type:
                entry = {
                    'title':
                    escape('%s (%s)' % (book['BookName'], book['BookDate'])),
                    'id':
                    escape('book:%s' % book['BookID']),
                    'updated':
                    opdstime(book['BookAdded']),
                    'href':
                    '%s?cmd=Serve&amp;bookid=%s' %
                    (self.opdsroot, book['BookID']),
                    'kind':
                    'acquisition',
                    'rel':
                    rel,
                    'type':
                    mime_type
                }
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['thumbnail'] = entry['image']
                    entry['content'] = escape(
                        '%s - %s' % (book['BookName'], book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape(
                        '%s (%s)' % (book['BookName'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        authorname = '%s (%s)' % (escape(author), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % authorname
        feed['id'] = 'author:%s' % escape(kwargs['authorid'])
        feed['updated'] = now()
        links.append(
            getLink(
                href=self.opdsroot,
                ftype=
                'application/atom+xml; profile=opds-catalog; kind=navigation',
                rel='start',
                title='Home'))
        links.append(
            getLink(
                href='%s?cmd=Authors' % self.opdsroot,
                ftype=
                'application/atom+xml; profile=opds-catalog; kind=navigation',
                rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(
                    href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' %
                    (self.opdsroot, quote_plus(
                        kwargs['authorid']), index + self.PAGE_SIZE),
                    ftype=
                    'application/atom+xml; profile=opds-catalog; kind=navigation',
                    rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(
                    href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' %
                    (self.opdsroot, quote_plus(
                        kwargs['authorid']), index - self.PAGE_SIZE),
                    ftype=
                    'application/atom+xml; profile=opds-catalog; kind=navigation',
                    rel='previous'))
        feed['links'] = links
        feed['entries'] = entries
        self.data = feed
        logger.debug("Returning %s book%s" %
                     (len(entries), plural(len(entries))))
        return
Exemplo n.º 17
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?',
                        (seriesid, ))
    seriesname = result['SeriesName']
    members, api_hits = getSeriesMembers(seriesid, seriesname)
    dic = {
        u'\u2018': "",
        u'\u2019': "",
        u'\u201c': '',
        u'\u201d': '',
        "'": "",
        '"': ''
    }

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            # pubyear = member[5]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if not in_cache:
                        api_hits += 1
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(
                                booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find(
                                        './best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find(
                                        './best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug(
                                    "Author Search found %s %s, authorid %s" %
                                    (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(
                                lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(
                            params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if not in_cache:
                            api_hits += 1
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' %
                                        searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(
                                    booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find(
                                            './best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find(
                                            './best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug(
                                        "Title Search found %s %s, authorid %s"
                                        % (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" %
                                    (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" %
                                 (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False,
                                                     authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" %
                (newauth, plural(newauth), seriesname))
    return newauth
Exemplo n.º 18
0
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    global ALLOW_NEW, LAST_NEW
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
        item = myDB.match(cmd, (bookID, ))
    else:
        cmd = 'select SeriesName from series where SeriesID=?'
        cacheLocation = "SeriesCache"
        item = myDB.match(cmd, (seriesID, ))
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG[
                    'CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    if ALLOW_NEW:
                        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug(
                        "getBookWork: Returning Cached entry for %s %s" %
                        (bookID, reason))
                else:
                    logger.debug(
                        "getBookWork: Returning Cached workpage for %s" %
                        bookID)
            else:
                logger.debug(
                    "getBookWork: Returning Cached seriespage for %s" %
                    item['seriesName'])

            if PY2:
                with open(workfile, "r") as cachefile:
                    source = cachefile.read()
            else:
                # noinspection PyArgumentList
                with open(workfile, "r",
                          errors="backslashreplace") as cachefile:
                    source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if not ALLOW_NEW:
                # don't nag. Show message no more than every 12 hrs
                timenow = int(time.time())
                if check_int(LAST_NEW, 0) + 43200 < timenow:
                    logger.warn("New WhatWork is disabled")
                    LAST_NEW = timenow
                return None
            if bookID:
                title = safe_unicode(item['BookName'])
                author = safe_unicode(item['AuthorName'])
                if PY2:
                    title = title.encode(lazylibrarian.SYS_ENCODING)
                    author = author.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                      (quote_plus(author), quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName'])
                if PY2:
                    seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % quote_plus(
                    seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                # noinspection PyBroadException
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split(
                            '</error>')[0]
                    except IndexError:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item[
                            'BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            # noinspection PyBroadException
                            try:
                                workpage = result.split('<link>')[1].split(
                                    '</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split(
                                        '</error>')[0]
                                except IndexError:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug("Librarything: [%s] for ISBN %s" %
                                             (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' +
                                     item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug("getBookWork: Caching workpage for %s" %
                                     workfile)
                    else:
                        logger.debug(
                            "getBookWork: Caching series page for %s" %
                            workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug(
                        "getBookWork: Unable to cache workpage, got %s" %
                        result)
                else:
                    logger.debug(
                        "getBookWork: Unable to cache series page, got %s" %
                        result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None
Exemplo n.º 19
0
def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov, dltypes = IterateOverRSSSites()
        if nprov and dltypes != 'M':
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'dispname' in item:
            provider = item['dispname']
        elif 'nzbprov' in item:
            provider = item['nzbprov']
        elif 'tor_prov' in item:
            provider = item['tor_prov']
        elif 'rss_prov' in item:
            provider = item['rss_prov']
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            url = url.encode('utf-8')
            if mode == 'torznab':
                # noinspection PyTypeChecker
                if url.startswith('magnet'):
                    mode = 'magnet'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': quote_plus(url), 'mode': mode}

                searchresults.append(result)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults
Exemplo n.º 20
0
def getBookCover(bookID=None, src=None):
    """ Return link to a local file containing a book cover image for a bookid, and which source used.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing cover image (if you have a dev key)
            4. LibraryThing whatwork (if available)
            5. Goodreads search (if book was imported from goodreads)
            6. OpenLibrary image
            7. Google isbn search (if google has a link to book for sale)
            8. Google images search (if lazylibrarian config allows)

        src = cache, cover, goodreads, librarything, whatwork, googleisbn, openlibrary, googleimage
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None, src

    if not src:
        src = ''
    logger.debug("Getting %s cover for %s" % (src, bookID))
    # noinspection PyBroadException
    try:
        cachedir = lazylibrarian.CACHEDIR
        coverfile = os.path.join(cachedir, "book", bookID + '.jpg')
        if not src or src == 'cache' or src == 'current':
            if os.path.isfile(coverfile):  # use cached image if there is one
                lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink, 'cache'
            elif src:
                lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
                return None, src

        myDB = database.DBConnection()
        if not src or src == 'cover':
            item = myDB.match('select BookFile from books where bookID=?', (bookID,))
            if item:
                bookfile = item['BookFile']
                if bookfile:  # we may have a cover.jpg in the same folder
                    bookdir = os.path.dirname(bookfile)
                    coverimg = os.path.join(bookdir, "cover.jpg")
                    if os.path.isfile(coverimg):
                        if src:
                            coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg')
                            coverlink = 'cache/book/' + bookID + '_cover.jpg'
                            logger.debug("Caching cover.jpg for %s" % bookID)
                        else:
                            coverlink = 'cache/book/' + bookID + '.jpg'
                            logger.debug("Caching cover.jpg for %s" % coverfile)
                        _ = safe_copy(coverimg, coverfile)
                        return coverlink, src
            if src:
                logger.debug('No cover.jpg found for %s' % bookID)
                return None, src

        # see if librarything  has a cover
        if not src or src == 'librarything':
            if lazylibrarian.CONFIG['LT_DEVKEY']:
                cmd = 'select BookISBN from books where bookID=?'
                item = myDB.match(cmd, (bookID,))
                if item and item['BookISBN']:
                    img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % (
                           lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN'])
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_lt', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    # if librarything has no image they return a 1x1 gif
                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching librarything cover for %s" % bookID)
                        return coverlink, 'librarything'
                    else:
                        logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                else:
                    logger.debug("No isbn for %s" % bookID)
            if src:
                return None, src

        # see if librarything workpage has a cover
        if not src or src == 'whatwork':
            work = getBookWork(bookID, "Cover")
            if work:
                try:
                    img = work.split('workCoverImage')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('workCoverImage not found in work page for %s' % bookID)

                try:
                    img = work.split('og:image')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        if success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('og:image not found in work page for %s' % bookID)
            else:
                logger.debug('No work page for %s' % bookID)
            if src:
                return None, src

        cmd = 'select BookName,AuthorName,BookLink,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        item = myDB.match(cmd, (bookID,))
        safeparams = ''
        booklink = ''
        if item:
            title = safe_unicode(item['BookName'])
            author = safe_unicode(item['AuthorName'])
            if PY2:
                title = title.encode(lazylibrarian.SYS_ENCODING)
                author = author.encode(lazylibrarian.SYS_ENCODING)
            booklink = item['BookLink']
            safeparams = quote_plus("%s %s" % (author, title))

        # try to get a cover from goodreads
        if not src or src == 'goodreads':
            if booklink and 'goodreads' in booklink:
                # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
                # and scrape the page for og:image
                # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
                # 1388267702i/16304._UY475_SS475_.jpg"/>
                # to get the cover
                result, success = fetchURL(booklink)
                if success:
                    try:
                        img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        try:
                            img = result.split('og:image')[1].split('="')[1].split('"')[0]
                        except IndexError:
                            img = None
                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'goodreads':
                            coverlink, success, _ = cache_img("book", bookID + '_gr', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'goodreads'
                        else:
                            logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in goodreads page for %s" % bookID)
                else:
                    logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result))
            if src:
                return None, src

        # try to get a cover from openlibrary
        if not src or src == 'openlibrary':
            if item['BookISBN']:
                baseurl = 'https://openlibrary.org/api/books?format=json&jscmd=data&bibkeys=ISBN:'
                result, success = fetchURL(baseurl + item['BookISBN'])
                if success:
                    try:
                        source = json.loads(result)  # type: dict
                    except Exception as e:
                        logger.debug("OpenLibrary json error: %s" % e)
                        source = []

                    img = ''
                    if source:
                        # noinspection PyUnresolvedReferences
                        k = source.keys()[0]
                        try:
                            img = source[k]['cover']['medium']
                        except KeyError:
                            try:
                                img = source[k]['cover']['large']
                            except KeyError:
                                logger.debug("No openlibrary image for %s" % item['BookISBN'])

                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'openlibrary':
                            coverlink, success, _ = cache_img("book", bookID + '_ol', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty openlibrary image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching openlibrary cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'openlibrary'
                else:
                    logger.debug("OpenLibrary error: %s" % result)
            if src:
                return None, src

        if not src or src == 'googleisbn':
            # try a google isbn page search...
            # there is no image returned if google doesn't have a link for buying the book
            if safeparams:
                URL = "http://www.google.com/search?q=ISBN+" + safeparams
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('imgurl=')[1].split('&imgrefurl')[0]
                    except IndexError:
                        try:
                            img = result.split('img src="')[1].split('"')[0]
                        except IndexError:
                            img = None

                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_gi', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching google isbn cover for %s %s" %
                                         (item['AuthorName'], item['BookName']))
                            return coverlink, 'google isbn'
                        else:
                            logger.debug("Error caching google image %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in google isbn page for %s" % bookID)
                else:
                    logger.debug("Failed to fetch url from google")
            else:
                logger.debug("No parameters for google isbn search for %s" % bookID)
            if src:
                return None, src

        if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']:
            # try a google image search...
            # tbm=isch      search images
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            if safeparams:
                URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
                img = None
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        img = None

                if img and img.startswith('http'):
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_gb', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching google search cover for %s %s" %
                                     (item['AuthorName'], item['BookName']))
                        return coverlink, 'google image'
                    else:
                        logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
                else:
                    logger.debug("No image found in google page for %s" % bookID)
            else:
                logger.debug("No parameters for google image search for %s" % bookID)
            if src:
                return None, src

        logger.debug("No image found from any configured source")
        return None, src
    except Exception:
        logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc())
    return None, src
Exemplo n.º 21
0
def getBookCover(bookID=None, src=None):
    """ Return link to a local file containing a book cover image for a bookid, and which source used.
        Try 1. Local file cached from goodreads/googlebooks when book was imported
            2. cover.jpg if we have the book
            3. LibraryThing cover image (if you have a dev key)
            4. LibraryThing whatwork (if available)
            5. Goodreads search (if book was imported from goodreads)
            6. Google isbn search (if google has a link to book for sale)
            7. Google images search (if lazylibrarian config allows)

        src = cache, cover, goodreads, librarything, whatwork, googleisbn, googleimage
        Return None if no cover available. """
    if not bookID:
        logger.error("getBookCover- No bookID")
        return None, src

    if not src:
        src = ''
    logger.debug("Getting %s cover for %s" % (src, bookID))
    # noinspection PyBroadException
    try:
        cachedir = lazylibrarian.CACHEDIR
        coverfile = os.path.join(cachedir, "book", bookID + '.jpg')
        if not src or src == 'cache' or src == 'current':
            if os.path.isfile(coverfile):  # use cached image if there is one
                lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
                coverlink = 'cache/book/' + bookID + '.jpg'
                return coverlink, 'cache'
            elif src:
                lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
                return None, src

        myDB = database.DBConnection()
        if not src or src == 'cover':
            item = myDB.match('select BookFile from books where bookID=?', (bookID,))
            if item:
                bookfile = item['BookFile']
                if bookfile:  # we may have a cover.jpg in the same folder
                    bookdir = os.path.dirname(bookfile)
                    coverimg = os.path.join(bookdir, "cover.jpg")
                    if os.path.isfile(coverimg):
                        if src:
                            coverfile = os.path.join(cachedir, "book", bookID + '_cover.jpg')
                            coverlink = 'cache/book/' + bookID + '_cover.jpg'
                            logger.debug("Caching cover.jpg for %s" % bookID)
                        else:
                            coverlink = 'cache/book/' + bookID + '.jpg'
                            logger.debug("Caching cover.jpg for %s" % coverfile)
                        _ = safe_copy(coverimg, coverfile)
                        return coverlink, src
            if src:
                logger.debug('No cover.jpg found for %s' % bookID)
                return None, src

        # see if librarything  has a cover
        if not src or src == 'librarything':
            if lazylibrarian.CONFIG['LT_DEVKEY']:
                cmd = 'select BookISBN from books where bookID=?'
                item = myDB.match(cmd, (bookID,))
                if item and item['BookISBN']:
                    img = 'https://www.librarything.com/devkey/%s/large/isbn/%s' % (
                           lazylibrarian.CONFIG['LT_DEVKEY'], item['BookISBN'])
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_lt', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    # if librarything has no image they return a 1x1 gif
                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty librarything image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching librarything cover for %s" % bookID)
                        return coverlink, 'librarything'
                    else:
                        logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                else:
                    logger.debug("No isbn for %s" % bookID)
            if src:
                return None, src

        # see if librarything workpage has a cover
        if not src or src == 'whatwork':
            work = getBookWork(bookID, "Cover")
            if work:
                try:
                    img = work.split('workCoverImage')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('workCoverImage not found in work page for %s' % bookID)

                try:
                    img = work.split('og:image')[1].split('="')[1].split('"')[0]
                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_ww', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        # if librarything has no image they return a 1x1 gif
                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty whatwork image for %s [%s]' % (bookID, coverlink))
                        if success:
                            logger.debug("Caching whatwork cover for %s" % bookID)
                            return coverlink, 'whatwork'
                        else:
                            logger.debug('Failed to cache image for %s [%s]' % (img, coverlink))
                    else:
                        logger.debug("No image found in work page for %s" % bookID)
                except IndexError:
                    logger.debug('og:image not found in work page for %s' % bookID)
            else:
                logger.debug('No work page for %s' % bookID)
            if src:
                return None, src

        cmd = 'select BookName,AuthorName,BookLink from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        item = myDB.match(cmd, (bookID,))
        safeparams = ''
        booklink = ''
        if item:
            title = safe_unicode(item['BookName'])
            author = safe_unicode(item['AuthorName'])
            if PY2:
                title = title.encode(lazylibrarian.SYS_ENCODING)
                author = author.encode(lazylibrarian.SYS_ENCODING)
            booklink = item['BookLink']
            safeparams = quote_plus("%s %s" % (author, title))

        # try to get a cover from goodreads
        if not src or src == 'goodreads':
            if booklink and 'goodreads' in booklink:
                # if the bookID is a goodreads one, we can call https://www.goodreads.com/book/show/{bookID}
                # and scrape the page for og:image
                # <meta property="og:image" content="https://i.gr-assets.com/images/S/photo.goodreads.com/books/
                # 1388267702i/16304._UY475_SS475_.jpg"/>
                # to get the cover
                result, success = fetchURL(booklink)
                if success:
                    try:
                        img = result.split('id="coverImage"')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        try:
                            img = result.split('og:image')[1].split('="')[1].split('"')[0]
                        except IndexError:
                            img = None
                    if img and img.startswith('http') and 'nocover' not in img and 'nophoto' not in img:
                        if src == 'goodreads':
                            coverlink, success, _ = cache_img("book", bookID + '_gr', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching goodreads cover for %s %s" % (item['AuthorName'], item['BookName']))
                            return coverlink, 'goodreads'
                        else:
                            logger.debug("Error getting goodreads image for %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in goodreads page for %s" % bookID)
                else:
                    logger.debug("Error getting goodreads page %s, [%s]" % (booklink, result))
            if src:
                return None, src

        if not src or src == 'googleisbn':
            # try a google isbn page search...
            # there is no image returned if google doesn't have a link for buying the book
            if safeparams:
                URL = "http://www.google.com/search?q=ISBN+" + safeparams
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('imgurl=')[1].split('&imgrefurl')[0]
                    except IndexError:
                        try:
                            img = result.split('img src="')[1].split('"')[0]
                        except IndexError:
                            img = None

                    if img and img.startswith('http'):
                        if src:
                            coverlink, success, _ = cache_img("book", bookID + '_gi', img)
                        else:
                            coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                        data = ''
                        coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                        if os.path.isfile(coverfile):
                            with open(coverfile, 'rb') as f:
                                data = f.read()
                        if len(data) < 50:
                            logger.debug('Got an empty google image for %s [%s]' % (bookID, coverlink))
                        elif success:
                            logger.debug("Caching google isbn cover for %s %s" %
                                         (item['AuthorName'], item['BookName']))
                            return coverlink, 'google isbn'
                        else:
                            logger.debug("Error caching google image %s, [%s]" % (img, coverlink))
                    else:
                        logger.debug("No image found in google isbn page for %s" % bookID)
                else:
                    logger.debug("Failed to fetch url from google")
            else:
                logger.debug("No parameters for google isbn search for %s" % bookID)
            if src:
                return None, src

        if src == 'googleimage' or not src and lazylibrarian.CONFIG['IMP_GOOGLEIMAGE']:
            # try a google image search...
            # tbm=isch      search images
            # tbs=isz:l     large images
            # ift:jpg       jpeg file type
            if safeparams:
                URL = "https://www.google.com/search?tbm=isch&tbs=isz:l,ift:jpg&as_q=" + safeparams + "+ebook"
                img = None
                result, success = fetchURL(URL)
                if success:
                    try:
                        img = result.split('url?q=')[1].split('">')[1].split('src="')[1].split('"')[0]
                    except IndexError:
                        img = None

                if img and img.startswith('http'):
                    if src:
                        coverlink, success, _ = cache_img("book", bookID + '_gb', img)
                    else:
                        coverlink, success, _ = cache_img("book", bookID, img, refresh=True)

                    data = ''
                    coverfile = os.path.join(lazylibrarian.DATADIR, coverlink)
                    if os.path.isfile(coverfile):
                        with open(coverfile, 'rb') as f:
                            data = f.read()
                    if len(data) < 50:
                        logger.debug('Got an empty goodreads image for %s [%s]' % (bookID, coverlink))
                    elif success:
                        logger.debug("Caching google search cover for %s %s" %
                                     (item['AuthorName'], item['BookName']))
                        return coverlink, 'google image'
                    else:
                        logger.debug("Error getting google image %s, [%s]" % (img, coverlink))
                else:
                    logger.debug("No image found in google page for %s" % bookID)
            else:
                logger.debug("No parameters for google image search for %s" % bookID)
            if src:
                return None, src

        logger.debug("No image found from any configured source")
        return None, src
    except Exception:
        logger.error('Unhandled exception in getBookCover: %s' % traceback.format_exc())
    return None, src
Exemplo n.º 22
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' %
                         fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace(
                        '"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm +
                                              '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(
                            lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' +
                                                   searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn(
                                    'Found no results for %s with value: %s' %
                                    (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while'
                                % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug(
                                    'Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(
                                lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' %
                                            (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug(
                                        'Skipped %s where no language is found'
                                        % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(
                                    book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(
                                    book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(
                                    book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2,
                                               isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?',
                                    (book['author'].replace('"', '""'), ))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname':
                                book['author'],
                                'authorid':
                                AuthorID,
                                'bookid':
                                item['id'],
                                'bookname':
                                bookname,
                                'booksub':
                                book['sub'],
                                'bookisbn':
                                book['isbn'],
                                'bookpub':
                                book['pub'],
                                'bookdate':
                                book['date'],
                                'booklang':
                                book['lang'],
                                'booklink':
                                book['link'],
                                'bookrate':
                                float(book['rate']),
                                'bookrate_count':
                                book['rate_count'],
                                'bookimg':
                                book['img'],
                                'bookpages':
                                book['pages'],
                                'bookgenre':
                                book['genre'],
                                'bookdesc':
                                book['desc'],
                                'author_fuzz':
                                author_fuzz,
                                'book_fuzz':
                                book_fuzz,
                                'isbn_fuzz':
                                isbn_fuzz,
                                'highest_fuzz':
                                highest_fuzz,
                                'num_reviews':
                                book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug(
                    "Returning %s result%s for (%s) with keyword: %s" %
                    (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" %
                         (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" %
                         (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" %
                         (no_author_count, plural(no_author_count)))
            logger.debug(
                'The Google Books API was hit %s time%s for searchterm: %s' %
                (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' %
                         traceback.format_exc())
Exemplo n.º 23
0
def getBookWork(bookID=None, reason=None, seriesID=None):
    """ return the contents of the LibraryThing workpage for the given bookid, or seriespage if seriesID given
        preferably from the cache. If not already cached cache the results
        Return None if no workpage/seriespage available """
    global ALLOW_NEW, LAST_NEW
    if not bookID and not seriesID:
        logger.error("getBookWork - No bookID or seriesID")
        return None

    if not reason:
        reason = ""

    myDB = database.DBConnection()
    if bookID:
        cmd = 'select BookName,AuthorName,BookISBN from books,authors where bookID=?'
        cmd += ' and books.AuthorID = authors.AuthorID'
        cacheLocation = "WorkCache"
        item = myDB.match(cmd, (bookID,))
    else:
        cmd = 'select SeriesName from series where SeriesID=?'
        cacheLocation = "SeriesCache"
        item = myDB.match(cmd, (seriesID,))
    if item:
        cacheLocation = os.path.join(lazylibrarian.CACHEDIR, cacheLocation)
        if bookID:
            workfile = os.path.join(cacheLocation, str(bookID) + '.html')
        else:
            workfile = os.path.join(cacheLocation, str(seriesID) + '.html')

        # does the workpage need to expire? For now only expire if it was an error page
        # (small file) or a series page as librarything might get better info over time, more series members etc
        if os.path.isfile(workfile):
            if seriesID or os.path.getsize(workfile) < 500:
                cache_modified_time = os.stat(workfile).st_mtime
                time_now = time.time()
                expiry = lazylibrarian.CONFIG['CACHE_AGE'] * 24 * 60 * 60  # expire cache after this many seconds
                if cache_modified_time < time_now - expiry:
                    # Cache entry is too old, delete it
                    if ALLOW_NEW:
                        os.remove(workfile)

        if os.path.isfile(workfile):
            # use cached file if possible to speed up refreshactiveauthors and librarysync re-runs
            lazylibrarian.CACHE_HIT = int(lazylibrarian.CACHE_HIT) + 1
            if bookID:
                if reason:
                    logger.debug("getBookWork: Returning Cached entry for %s %s" % (bookID, reason))
                else:
                    logger.debug("getBookWork: Returning Cached workpage for %s" % bookID)
            else:
                logger.debug("getBookWork: Returning Cached seriespage for %s" % item['seriesName'])

            if PY2:
                with open(workfile, "r") as cachefile:
                    source = cachefile.read()
            else:
                # noinspection PyArgumentList
                with open(workfile, "r", errors="backslashreplace") as cachefile:
                    source = cachefile.read()
            return source
        else:
            lazylibrarian.CACHE_MISS = int(lazylibrarian.CACHE_MISS) + 1
            if not ALLOW_NEW:
                # don't nag. Show message no more than every 12 hrs
                timenow = int(time.time())
                if check_int(LAST_NEW, 0) + 43200 < timenow:
                    logger.warn("New WhatWork is disabled")
                    LAST_NEW = timenow
                return None
            if bookID:
                title = safe_unicode(item['BookName'])
                author = safe_unicode(item['AuthorName'])
                if PY2:
                    title = title.encode(lazylibrarian.SYS_ENCODING)
                    author = author.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/api/whatwork.php?author=%s&title=%s' % \
                      (quote_plus(author), quote_plus(title))
            else:
                seriesname = safe_unicode(item['seriesName'])
                if PY2:
                    seriesname = seriesname.encode(lazylibrarian.SYS_ENCODING)
                URL = 'http://www.librarything.com/series/%s' % quote_plus(seriesname)

            librarything_wait()
            result, success = fetchURL(URL)
            if bookID and success:
                # noinspection PyBroadException
                try:
                    workpage = result.split('<link>')[1].split('</link>')[0]
                    librarything_wait()
                    result, success = fetchURL(workpage)
                except Exception:
                    try:
                        errmsg = result.split('<error>')[1].split('</error>')[0]
                    except IndexError:
                        errmsg = "Unknown Error"
                    # if no workpage link, try isbn instead
                    if item['BookISBN']:
                        URL = 'http://www.librarything.com/api/whatwork.php?isbn=' + item['BookISBN']
                        librarything_wait()
                        result, success = fetchURL(URL)
                        if success:
                            # noinspection PyBroadException
                            try:
                                workpage = result.split('<link>')[1].split('</link>')[0]
                                librarything_wait()
                                result, success = fetchURL(workpage)
                            except Exception:
                                # no workpage link found by isbn
                                try:
                                    errmsg = result.split('<error>')[1].split('</error>')[0]
                                except IndexError:
                                    errmsg = "Unknown Error"
                                # still cache if whatwork returned a result without a link, so we don't keep retrying
                                logger.debug("Librarything: [%s] for ISBN %s" % (errmsg, item['BookISBN']))
                                success = True
                    else:
                        # still cache if whatwork returned a result without a link, so we don't keep retrying
                        msg = "Librarything: [" + errmsg + "] for "
                        logger.debug(msg + item['AuthorName'] + ' ' + item['BookName'])
                        success = True
            if success:
                with open(workfile, "w") as cachefile:
                    cachefile.write(result)
                    if bookID:
                        logger.debug("getBookWork: Caching workpage for %s" % workfile)
                    else:
                        logger.debug("getBookWork: Caching series page for %s" % workfile)
                    # return None if we got an error page back
                    if '</request><error>' in result:
                        return None
                return result
            else:
                if bookID:
                    logger.debug("getBookWork: Unable to cache workpage, got %s" % result)
                else:
                    logger.debug("getBookWork: Unable to cache series page, got %s" % result)
            return None
    else:
        if bookID:
            logger.debug('Get Book Work - Invalid bookID [%s]' % bookID)
        else:
            logger.debug('Get Book Work - Invalid seriesID [%s]' % seriesID)
        return None
Exemplo n.º 24
0
    def find_results(self, searchterm=None, queue=None):
        """ GoogleBooks performs much better if we search for author OR title
            not both at once, so if searchterm is not isbn, two searches needed.
            Lazylibrarian searches use <ll> to separate title from author in searchterm
            If this token isn't present, it's an isbn or searchterm as supplied by user
        """
        try:
            myDB = database.DBConnection()
            resultlist = []
            # See if we should check ISBN field, otherwise ignore it
            api_strings = ['inauthor:', 'intitle:']
            if is_valid_isbn(searchterm):
                api_strings = ['isbn:']

            api_hits = 0

            ignored = 0
            total_count = 0
            no_author_count = 0
            title = ''
            authorname = ''

            if ' <ll> ' in searchterm:  # special token separates title from author
                title, authorname = searchterm.split(' <ll> ')

            fullterm = searchterm.replace(' <ll> ', ' ')
            logger.debug('Now searching Google Books API with searchterm: %s' % fullterm)

            for api_value in api_strings:
                set_url = self.url
                if api_value == "isbn:":
                    set_url = set_url + quote(api_value + searchterm)
                elif api_value == 'intitle:':
                    searchterm = fullterm
                    if title:  # just search for title
                        # noinspection PyUnresolvedReferences
                        title = title.split(' (')[0]  # without any series info
                        searchterm = title
                    searchterm = searchterm.replace("'", "").replace('"', '').strip()  # and no quotes
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote(api_value + '"' + searchterm + '"')
                elif api_value == 'inauthor:':
                    searchterm = fullterm
                    if authorname:
                        searchterm = authorname  # just search for author
                    searchterm = searchterm.strip()
                    if PY2:
                        searchterm = searchterm.encode(lazylibrarian.SYS_ENCODING)
                    set_url = set_url + quote_plus(api_value + '"' + searchterm + '"')

                startindex = 0
                resultcount = 0
                ignored = 0
                number_results = 1
                total_count = 0
                no_author_count = 0
                try:
                    while startindex < number_results:

                        self.params['startIndex'] = startindex
                        URL = set_url + '&' + urlencode(self.params)

                        try:
                            jsonresults, in_cache = gb_json_request(URL)
                            if jsonresults is None:
                                number_results = 0
                            else:
                                if not in_cache:
                                    api_hits += 1
                                number_results = jsonresults['totalItems']
                                logger.debug('Searching url: ' + URL)
                            if number_results == 0:
                                logger.warn('Found no results for %s with value: %s' % (api_value, searchterm))
                                break
                            else:
                                pass
                        except Exception as err:
                            if hasattr(err, 'reason'):
                                errmsg = err.reason
                            else:
                                errmsg = str(err)
                            logger.warn(
                                'Google Books API Error [%s]: Check your API key or wait a while' % errmsg)
                            break

                        startindex += 40

                        for item in jsonresults['items']:
                            total_count += 1

                            book = bookdict(item)
                            if not book['author']:
                                logger.debug('Skipped a result without authorfield.')
                                no_author_count += 1
                                continue

                            if not book['name']:
                                logger.debug('Skipped a result without title.')
                                continue

                            valid_langs = getList(lazylibrarian.CONFIG['IMP_PREFLANG'])
                            if "All" not in valid_langs:  # don't care about languages, accept all
                                try:
                                    # skip if language is not in valid list -
                                    booklang = book['lang']
                                    if booklang not in valid_langs:
                                        logger.debug(
                                            'Skipped %s with language %s' % (book['name'], booklang))
                                        ignored += 1
                                        continue
                                except KeyError:
                                    ignored += 1
                                    logger.debug('Skipped %s where no language is found' % book['name'])
                                    continue

                            if authorname:
                                author_fuzz = fuzz.ratio(book['author'], authorname)
                            else:
                                author_fuzz = fuzz.ratio(book['author'], fullterm)

                            if title:
                                book_fuzz = fuzz.token_set_ratio(book['name'], title)
                                # lose a point for each extra word in the fuzzy matches so we get the closest match
                                words = len(getList(book['name']))
                                words -= len(getList(title))
                                book_fuzz -= abs(words)
                            else:
                                book_fuzz = fuzz.token_set_ratio(book['name'], fullterm)

                            isbn_fuzz = 0
                            if is_valid_isbn(fullterm):
                                isbn_fuzz = 100

                            highest_fuzz = max((author_fuzz + book_fuzz) / 2, isbn_fuzz)

                            dic = {':': '.', '"': '', '\'': ''}
                            bookname = replace_all(book['name'], dic)

                            bookname = unaccented(bookname)
                            bookname = bookname.strip()  # strip whitespace

                            AuthorID = ''
                            if book['author']:
                                match = myDB.match(
                                    'SELECT AuthorID FROM authors WHERE AuthorName=?', (
                                        book['author'].replace('"', '""'),))
                                if match:
                                    AuthorID = match['AuthorID']

                            resultlist.append({
                                'authorname': book['author'],
                                'authorid': AuthorID,
                                'bookid': item['id'],
                                'bookname': bookname,
                                'booksub': book['sub'],
                                'bookisbn': book['isbn'],
                                'bookpub': book['pub'],
                                'bookdate': book['date'],
                                'booklang': book['lang'],
                                'booklink': book['link'],
                                'bookrate': float(book['rate']),
                                'bookrate_count': book['rate_count'],
                                'bookimg': book['img'],
                                'bookpages': book['pages'],
                                'bookgenre': book['genre'],
                                'bookdesc': book['desc'],
                                'author_fuzz': author_fuzz,
                                'book_fuzz': book_fuzz,
                                'isbn_fuzz': isbn_fuzz,
                                'highest_fuzz': highest_fuzz,
                                'num_reviews': book['ratings']
                            })

                            resultcount += 1

                except KeyError:
                    break

                logger.debug("Returning %s result%s for (%s) with keyword: %s" %
                             (resultcount, plural(resultcount), api_value, searchterm))

            logger.debug("Found %s result%s" % (total_count, plural(total_count)))
            logger.debug("Removed %s unwanted language result%s" % (ignored, plural(ignored)))
            logger.debug("Removed %s book%s with no author" % (no_author_count, plural(no_author_count)))
            logger.debug('The Google Books API was hit %s time%s for searchterm: %s' %
                         (api_hits, plural(api_hits), fullterm))
            queue.put(resultlist)

        except Exception:
            logger.error('Unhandled exception in GB.find_results: %s' % traceback.format_exc())
Exemplo n.º 25
0
    def _RecentBooks(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {
            'title': 'LazyLibrarian OPDS - Recent Books',
            'id': 'Recent Books',
            'updated': now()
        }
        links = []
        entries = []
        links.append(
            getLink(
                href=self.opdsroot,
                ftype=
                'application/atom+xml; profile=opds-catalog; kind=navigation',
                rel='start',
                title='Home'))
        links.append(
            getLink(
                href='%s?cmd=RecentBooks' % self.opdsroot,
                ftype=
                'application/atom+xml; profile=opds-catalog; kind=navigation',
                rel='self'))
        links.append(
            getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                    ftype='application/opensearchdescription+xml',
                    rel='search',
                    title='Search Books'))
        cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID "
        cmd += "from books where Status='Open' "
        if 'query' in kwargs:
            cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by BookLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            rel = 'file'
            if book['BookFile']:
                mime_type = self.multiLink(book['BookFile'], book['BookID'])
                if mime_type:
                    rel = 'multi'
                else:
                    mime_type = mimeType(book['BookFile'])

            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                title = makeUnicode(book['BookName'])
                entry = {
                    'title':
                    escape(title),
                    'id':
                    escape('book:%s' % book['BookID']),
                    'updated':
                    opdstime(book['BookLibrary']),
                    'href':
                    '%s?cmd=Serve&amp;bookid=%s' %
                    (self.opdsroot, quote_plus(book['BookID'])),
                    'kind':
                    'acquisition',
                    'rel':
                    rel,
                    'type':
                    mime_type
                }

                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    author = myDB.match(
                        "SELECT AuthorName from authors WHERE AuthorID=?",
                        (book['AuthorID'], ))
                    author = makeUnicode(author['AuthorName'])
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['thumbnail'] = entry['image']
                    entry['content'] = escape('%s - %s' %
                                              (title, book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' %
                                              (title, book['BookAdded']))
                entries.append(entry)
            """
                <link type="application/epub+zip" rel="http://opds-spec.org/acquisition"
                title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/>
                <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition"
                title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/>
            """

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(
                    href='%s?cmd=RecentBooks&amp;index=%s' %
                    (self.opdsroot, index + self.PAGE_SIZE),
                    ftype=
                    'application/atom+xml; profile=opds-catalog; kind=navigation',
                    rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(
                    href='%s?cmd=RecentBooks&amp;index=%s' %
                    (self.opdsroot, index - self.PAGE_SIZE),
                    ftype=
                    'application/atom+xml; profile=opds-catalog; kind=navigation',
                    rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" %
                     (len(entries), plural(len(entries))))
        self.data = feed
        return
Exemplo n.º 26
0
def searchItem(item=None, bookid=None, cat=None):
    """
    Call all active search providers to search for item
    return a list of results, each entry in list containing percentage_match, title, provider, size, url
    item = searchterm to use for general search
    bookid = link to data for book/audio searches
    cat = category to search [general, book, audio]
    """
    results = []

    if not item:
        return results

    book = {}
    searchterm = unaccented_str(item)

    book['searchterm'] = searchterm
    if bookid:
        book['bookid'] = bookid
    else:
        book['bookid'] = searchterm

    if cat in ['book', 'audio']:
        myDB = database.DBConnection()
        cmd = 'SELECT authorName,bookName,bookSub from books,authors WHERE books.AuthorID=authors.AuthorID'
        cmd += ' and bookID=?'
        match = myDB.match(cmd, (bookid,))
        if match:
            book['authorName'] = match['authorName']
            book['bookName'] = match['bookName']
            book['bookSub'] = match['bookSub']
        else:
            logger.debug('Forcing general search')
            cat = 'general'

    nprov = lazylibrarian.USE_NZB() + lazylibrarian.USE_TOR() + lazylibrarian.USE_RSS() + lazylibrarian.USE_DIRECT()
    logger.debug('Searching %s provider%s (%s) for %s' % (nprov, plural(nprov), cat, searchterm))

    if lazylibrarian.USE_NZB():
        resultlist, nprov = IterateOverNewzNabSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_TOR():
        resultlist, nprov = IterateOverTorrentSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_DIRECT():
        resultlist, nprov = IterateOverDirectSites(book, cat)
        if nprov:
            results += resultlist
    if lazylibrarian.USE_RSS():
        resultlist, nprov = IterateOverRSSSites()
        if nprov:
            results += resultlist

    # reprocess to get consistent results
    searchresults = []
    for item in results:
        provider = ''
        title = ''
        url = ''
        size = ''
        date = ''
        mode = ''
        if 'nzbtitle' in item:
            title = item['nzbtitle']
        if 'nzburl' in item:
            url = item['nzburl']
        if 'nzbprov' in item:
            provider = item['nzbprov']
        if 'nzbsize' in item:
            size = item['nzbsize']
        if 'nzbdate' in item:
            date = item['nzbdate']
        if 'nzbmode' in item:
            mode = item['nzbmode']
        if 'tor_title' in item:
            title = item['tor_title']
        if 'tor_url' in item:
            url = item['tor_url']
        if 'tor_prov' in item:
            provider = item['tor_prov']
        if 'tor_size' in item:
            size = item['tor_size']
        if 'tor_date' in item:
            date = item['tor_date']
        if 'tor_type' in item:
            mode = item['tor_type']

        if title and provider and mode and url:
            # Not all results have a date or a size
            if not date:
                date = 'Fri, 01 Jan 1970 00:00:00 +0100'
            if not size:
                size = '1000'

            # calculate match percentage - torrents might have words_with_underscore_separator
            score = fuzz.token_set_ratio(searchterm, title.replace('_', ' '))
            # lose a point for each extra word in the title so we get the closest match
            words = len(getList(searchterm))
            words -= len(getList(title))
            score -= abs(words)
            if score >= 40:  # ignore wildly wrong results?
                result = {'score': score, 'title': title, 'provider': provider, 'size': size, 'date': date,
                          'url': quote_plus(url), 'mode': mode}

                searchresults.append(result)

                # from operator import itemgetter
                # searchresults = sorted(searchresults, key=itemgetter('score'), reverse=True)

    logger.debug('Found %s %s results for %s' % (len(searchresults), cat, searchterm))
    return searchresults
Exemplo n.º 27
0
    def _Author(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        if 'authorid' not in kwargs:
            self.data = self._error_with_message('No Author Provided')
            return
        links = []
        entries = []
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        author = myDB.match("SELECT AuthorName from authors WHERE AuthorID=?", (kwargs['authorid'],))
        author = makeUnicode(author['AuthorName'])
        cmd = "SELECT BookName,BookDate,BookID,BookAdded,BookDesc,BookImg,BookFile,AudioFile from books WHERE "
        if 'query' in kwargs:
            cmd += "BookName LIKE '%" + kwargs['query'] + "%' AND "
        cmd += "(Status='Open' or AudioStatus='Open') and AuthorID=? order by BookDate DESC"
        results = myDB.select(cmd, (kwargs['authorid'],))
        page = results[index:(index + self.PAGE_SIZE)]

        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])

            if mime_type:
                entry = {'title': escape('%s (%s)' % (book['BookName'], book['BookDate'])),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookAdded']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, book['BookID']),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (book['BookName'], book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (book['BookName'], book['BookAdded']))
                entries.append(entry)

        feed = {}
        authorname = '%s (%s)' % (escape(author), len(entries))
        feed['title'] = 'LazyLibrarian OPDS - %s' % authorname
        feed['id'] = 'author:%s' % escape(kwargs['authorid'])
        feed['updated'] = now()
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=Authors' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=Author&amp;authorid=%s&amp;index=%s' % (self.opdsroot,
                                                                             quote_plus(kwargs['authorid']),
                                                                             index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))
        feed['links'] = links
        feed['entries'] = entries
        self.data = feed
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        return
Exemplo n.º 28
0
def getSeriesAuthors(seriesid):
    """ Get a list of authors contributing to a series
        and import those authors (and their books) into the database
        Return how many authors you added """
    myDB = database.DBConnection()
    result = myDB.match("select count(*) as counter from authors")
    start = int(result['counter'])
    result = myDB.match('select SeriesName from series where SeriesID=?', (seriesid,))
    seriesname = result['SeriesName']
    members = getSeriesMembers(seriesid)
    dic = {u'\u2018': "", u'\u2019': "", u'\u201c': '', u'\u201d': '', "'": "", '"': ''}

    if members:
        myDB = database.DBConnection()
        for member in members:
            # order = member[0]
            bookname = member[1]
            authorname = member[2]
            # workid = member[3]
            authorid = member[4]
            bookname = replace_all(bookname, dic)
            if not authorid:
                # goodreads gives us all the info we need, librarything/google doesn't
                base_url = 'https://www.goodreads.com/search.xml?q='
                params = {"key": lazylibrarian.CONFIG['GR_API']}
                searchname = bookname + ' ' + authorname
                searchname = cleanName(unaccented(searchname))
                if PY2:
                    searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                searchterm = quote_plus(searchname)
                set_url = base_url + searchterm + '&' + urlencode(params)
                try:
                    rootxml, in_cache = gr_xml_request(set_url)
                    if rootxml is None:
                        logger.warn('Error getting XML for %s' % searchname)
                    else:
                        resultxml = rootxml.getiterator('work')
                        for item in resultxml:
                            try:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                            except (KeyError, AttributeError):
                                booktitle = ""
                            book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                            if book_fuzz >= 98:
                                try:
                                    author = item.find('./best_book/author/name').text
                                except (KeyError, AttributeError):
                                    author = ""
                                # try:
                                #     workid = item.find('./work/id').text
                                # except (KeyError, AttributeError):
                                #     workid = ""
                                try:
                                    authorid = item.find('./best_book/author/id').text
                                except (KeyError, AttributeError):
                                    authorid = ""
                                logger.debug("Author Search found %s %s, authorid %s" %
                                             (author, booktitle, authorid))
                                break
                    if not authorid:  # try again with title only
                        searchname = cleanName(unaccented(bookname))
                        if PY2:
                            searchname = searchname.encode(lazylibrarian.SYS_ENCODING)
                        searchterm = quote_plus(searchname)
                        set_url = base_url + searchterm + '&' + urlencode(params)
                        rootxml, in_cache = gr_xml_request(set_url)
                        if rootxml is None:
                            logger.warn('Error getting XML for %s' % searchname)
                        else:
                            resultxml = rootxml.getiterator('work')
                            for item in resultxml:
                                booktitle = item.find('./best_book/title').text
                                booktitle = replace_all(booktitle, dic)
                                book_fuzz = fuzz.token_set_ratio(booktitle, bookname)
                                if book_fuzz >= 98:
                                    try:
                                        author = item.find('./best_book/author/name').text
                                    except (KeyError, AttributeError):
                                        author = ""
                                    # try:
                                    #     workid = item.find('./work/id').text
                                    # except (KeyError, AttributeError):
                                    #     workid = ""
                                    try:
                                        authorid = item.find('./best_book/author/id').text
                                    except (KeyError, AttributeError):
                                        authorid = ""
                                    logger.debug("Title Search found %s %s, authorid %s" %
                                                 (author, booktitle, authorid))
                                    break
                    if not authorid:
                        logger.warn("GoodReads doesn't know about %s %s" % (authorname, bookname))
                except Exception as e:
                    logger.error("Error finding goodreads results: %s %s" % (type(e).__name__, str(e)))

            if authorid:
                lazylibrarian.importer.addAuthorToDB(refresh=False, authorid=authorid)

    result = myDB.match("select count(*) as counter from authors")
    finish = int(result['counter'])
    newauth = finish - start
    logger.info("Added %s new author%s for %s" % (newauth, plural(newauth), seriesname))
    return newauth
Exemplo n.º 29
0
    def _RecentBooks(self, **kwargs):
        index = 0
        if 'index' in kwargs:
            index = check_int(kwargs['index'], 0)
        myDB = database.DBConnection()
        feed = {'title': 'LazyLibrarian OPDS - Recent Books', 'id': 'Recent Books', 'updated': now()}
        links = []
        entries = []
        links.append(getLink(href=self.opdsroot, ftype='application/atom+xml; profile=opds-catalog; kind=navigation',
                             rel='start', title='Home'))
        links.append(getLink(href='%s?cmd=RecentBooks' % self.opdsroot,
                             ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='self'))
        links.append(getLink(href='%s/opensearchbooks.xml' % self.searchroot,
                             ftype='application/opensearchdescription+xml', rel='search', title='Search Books'))
        cmd = "select BookName,BookID,BookLibrary,BookDate,BookImg,BookDesc,BookAdded,BookFile,AuthorID "
        cmd += "from books where Status='Open' "
        if 'query' in kwargs:
            cmd += "AND BookName LIKE '%" + kwargs['query'] + "%' "
        cmd += "order by BookLibrary DESC"
        results = myDB.select(cmd)
        page = results[index:(index + self.PAGE_SIZE)]
        for book in page:
            mime_type = None
            if book['BookFile']:
                mime_type = mimeType(book['BookFile'])
            elif book['AudioFile']:
                mime_type = mimeType(book['AudioFile'])
            if mime_type:
                title = makeUnicode(book['BookName'])
                entry = {'title': escape(title),
                         'id': escape('book:%s' % book['BookID']),
                         'updated': opdstime(book['BookLibrary']),
                         'href': '%s?cmd=Serve&amp;bookid=%s' % (self.opdsroot, quote_plus(book['BookID'])),
                         'kind': 'acquisition',
                         'rel': 'file',
                         'type': mime_type}
                if lazylibrarian.CONFIG['OPDS_METAINFO']:
                    author = myDB.match("SELECT AuthorName from authors WHERE AuthorID='%s'" % book['AuthorID'])
                    author = makeUnicode(author['AuthorName'])
                    entry['image'] = self.searchroot + '/' + book['BookImg']
                    entry['content'] = escape('%s - %s' % (title, book['BookDesc']))
                    entry['author'] = escape('%s' % author)
                else:
                    entry['content'] = escape('%s (%s)' % (title, book['BookAdded']))
                entries.append(entry)

            """
                <link type="application/epub+zip" rel="http://opds-spec.org/acquisition"
                title="EPUB (no images)" length="18552" href="//www.gutenberg.org/ebooks/57490.epub.noimages"/>
                <link type="application/x-mobipocket-ebook" rel="http://opds-spec.org/acquisition"
                title="Kindle (no images)" length="110360" href="//www.gutenberg.org/ebooks/57490.kindle.noimages"/>
            """

        if len(results) > (index + self.PAGE_SIZE):
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index + self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='next'))
        if index >= self.PAGE_SIZE:
            links.append(
                getLink(href='%s?cmd=RecentBooks&amp;index=%s' % (self.opdsroot, index - self.PAGE_SIZE),
                        ftype='application/atom+xml; profile=opds-catalog; kind=navigation', rel='previous'))

        feed['links'] = links
        feed['entries'] = entries
        logger.debug("Returning %s book%s" % (len(entries), plural(len(entries))))
        self.data = feed
        return