Пример #1
0
def _make_api_request(url, session=None, extra_headers={}):
    while True:
        if debug:
            output.warning("Mangadex API: requesting -> " + url)
        try:
            if session:
                r = session.get('https://api.mangadex.org/' + url.strip('/'),
                                headers={
                                    **MangadexV5Series.headers,
                                    **extra_headers
                                })
            else:
                r = requests.get('https://api.mangadex.org/' + url.strip('/'),
                                 headers={
                                     **MangadexV5Series.headers,
                                     **extra_headers
                                 })
        except requests.exceptions.ConnectionError:
            output.error(
                "Mangadex API: request to endpoint failed: {}".format(url))
            raise exceptions.ScrapingError
        if r.status_code == 200:
            return r
        elif r.status_code == 429:
            retry_delay = int(r.headers["retry-after"])
            output.warning(
                "Mangadex API: wait {} seconds...".format(retry_delay))
            time.sleep(retry_delay)
        else:
            output.error("Mangadex API: got bad status code {}".format(
                r.status_code))
            return r
Пример #2
0
def test_database():
    """Runs a database sanity test."""
    sanity_tester = sanity.DatabaseSanity(Base, engine)
    sanity_tester.test()
    if sanity_tester.errors:
        for error in sanity_tester.errors:
            err_target, err_msg = str(error).split(' ', 1)
            message = ' '.join([click.style(err_target, bold=True), err_msg])
            output.warning(message)
        output.error('Database has failed sanity check; '
                     'run `cu2 repair-db` to repair database')
        exit(1)
Пример #3
0
    def get(self, use_db=True):
        """Downloads the chapter if it is available.

        Optionally does not attempt to remove the chapter from the database or
        mark the chapter as downloaded if `db_remove` is set to False.
        """
        if self.available():
            self.download()
            if use_db:
                self.mark_downloaded()
        elif use_db:
            output.warning('Removing {} {}: missing from remote'.format(
                self.name, self.chapter))
            self.db_remove()
Пример #4
0
def follow(urls, directory, download, ignore):
    """Follow a series."""
    chapters = []
    for url in urls:
        try:
            series = utility.series_by_url(url)
        except exceptions.ScrapingError:
            output.warning('Scraping error ({})'.format(url))
            continue
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, url))
            continue
        if not series:
            output.warning('Invalid URL "{}"'.format(url))
            continue
        series.directory = directory
        if ignore:
            series.follow(ignore=True)
            output.chapter('Ignoring {} chapters'.format(len(series.chapters)))
        else:
            series.follow()
            chapters += db.Chapter.find_new(alias=series.alias)
        del series

    if download:
        output.chapter('Downloading {} chapters'.format(len(chapters)))
        for chapter in chapters:
            try:
                chapter.get()
            except exceptions.LoginError as e:
                output.warning('Could not download {c.alias} {c.chapter}: {e}'
                               .format(c=chapter, e=e.message))
            del chapter
Пример #5
0
 def page_download_task(page_num, r, page_url=None):
     """Saves the response body of a single request, returning the file
     handle and the passed through number of the page to allow for non-
     sequential downloads in parallel.
     """
     ext = BaseChapter.guess_extension(r.headers.get('content-type'))
     f = NamedTemporaryFile(suffix=ext, delete=False)
     retries = 20
     while retries > 0:
         try:
             for chunk in r.iter_content(chunk_size=4096):
                 if chunk:
                     f.write(chunk)
             retries = 0
         # basically ignores this exception that requests throws.  my
         # understanding is that it is raised when you attempt to iter_content()
         # over the same content twice.  don't understand how that situation
         # arises with the current code but it did somehow.
         # https://stackoverflow.com/questions/45379903/
         except requests.exceptions.StreamConsumedError:
             pass
         # when under heavy load, Mangadex will often kill the connection in
         # the middle of an image download.  in the original architecture,
         # the requests are all opened in the scrapers in stream mode, then
         # the actual image payloads are downloaded in the asynchronous
         # callbacks.  when this occurs we have not choice but to re-request
         # the image from the beginning (easier than playing around with range
         # headers).  this means each thread may issue multiple new requests.
         # I have found the performance overhead to be mostly negligible.
         except requests.exceptions.ChunkedEncodingError:
             if not page_url:
                 output.error(
                     "Connection killed on page {} but scraper does not support retries"
                     .format(str(page_num)))
                 raise exceptions.ScrapingError
             output.warning(
                 "Connection killed on page {}, {} retries remaining".
                 format(str(page_num), str(retries)))
             retries = retries - 1
             if retries <= 0:
                 output.error(
                     "Connection killed on page {}, no retries remaining - aborting chapter"
                     .format(str(page_num)))
                 raise exceptions.ScrapingError
             r = self.req_session.get(page_url, stream=True)
     f.flush()
     f.close()
     r.close()
     return ((page_num, f))
Пример #6
0
def get(input, directory):
    """Download chapters by URL or by alias:chapter.

    The command accepts input as either the chapter of the URL, the alias of a
    followed series, or the alias:chapter combination (e.g. 'bakuon:11'), if
    the chapter is already found in the database through a follow. The command
    will not enter the downloads in the database in case of URLs and ignores
    downloaded status in case of alias:chapter, so it can be used to download
    one-shots that don't require follows or for redownloading already
    downloaded chapters.
    """
    chapter_list = []
    for item in input:
        series = None
        try:
            series = utility.series_by_url(item)
        except exceptions.ScrapingError:
            pass
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if series:
            chapter_list += series.chapters
        chapter = None
        try:
            chapter = utility.chapter_by_url(item)
        except exceptions.ScrapingError:
            pass
        except exceptions.LoginError as e:
            output.warning('{} ({})'.format(e.message, item))
            continue
        if chapter:
            chapter_list.append(chapter)
        if not (series or chapter):
            chapters = db.session.query(db.Chapter).join(db.Series)
            try:
                alias, chapter = item.split(':')
                chapters = chapters.filter(db.Series.alias == alias,
                                           db.Chapter.chapter == chapter)
            except ValueError:
                chapters = chapters.filter(db.Series.alias == item)
            chapters = chapters.all()
            if not chapters:
                output.warning('Invalid selection "{}"'.format(item))
            for chapter in chapters:
                chapter_list.append(chapter.to_object())
        if series:
            del series
    for chapter in chapter_list:
        chapter.directory = directory
        try:
            chapter.get(use_db=False)
        except exceptions.LoginError as e:
            output.warning('Could not download {c.alias} {c.chapter}: {e}'
                           .format(c=chapter, e=e.message))
        del chapter
Пример #7
0
 def _translate_chapter_id(chapter_id):
     try:
         legacy_chapter_id = int(chapter_id)
         if debug:
             output.warning(
                 "Mangadex API: querying legacy chapter {} -> /legacy/mapping"
                 .format(str(legacy_chapter_id)))
         r = requests.post("https://api.mangadex.org/legacy/mapping",
                           json={
                               "type": "chapter",
                               "ids": [legacy_chapter_id]
                           })
         try:
             return r.json()["data"][0]["attributes"]["newId"]
         except KeyError:
             return "invalid"
     except ValueError:
         return chapter_id
Пример #8
0
def download(aliases):
    """Download all available chapters.

    If an optional alias is specified, the command will only download new
    chapters for that alias.
    """
    chapters = []
    if not aliases:
        chapters = db.Chapter.find_new()
    for alias in aliases:
        chapters += db.Chapter.find_new(alias=alias)
    output.chapter('Downloading {} chapters'.format(len(chapters)))
    for chapter in chapters:
        try:
            chapter.get()
        except exceptions.LoginError as e:
            output.warning('Could not download {c.alias} {c.chapter}: {e}'
                           .format(c=chapter, e=e.message))
        except exceptions.ScrapingError:
            pass
Пример #9
0
def update(fast):
    """Gather new chapters from followed series."""
    pool = concurrent.futures.ThreadPoolExecutor(config.get().download_threads)
    futures = []
    warnings = []
    aliases = {}
    query = db.session.query(db.Series).filter_by(following=True).all()
    if fast:
        skip_count = 0
        for series in query.copy():
            if not series.needs_update:
                skip_count += 1
                query.remove(series)
        output.series('Updating {} series ({} skipped)'
                      .format(len(query), skip_count))
    else:
        output.series('Updating {} series'.format(len(query)))
    for follow in query:
        fut = pool.submit(utility.series_by_url, follow.url)
        futures.append(fut)
        aliases[fut] = follow.alias
    with click.progressbar(length=len(futures), show_pos=True,
                           fill_char='>', empty_char=' ') as bar:
        for future in concurrent.futures.as_completed(futures):
            try:
                series = future.result()
            except exceptions.ConnectionError:
                warnings.append('Unable to update {} (connection error)'
                                .format(aliases[future]))
            except exceptions.ScrapingError:
                warnings.append('Unable to update {} (scraping error)'
                                .format(aliases[future]))
            except exceptions.LoginError as e:
                warnings.append('Unable to update {} ({})'
                                .format(aliases[future], e.message))
            else:
                series.update()
            bar.update(1)
    for w in warnings:
        output.warning(w)
    utility.list_new()
Пример #10
0
 def page_download_task(page_num, r, page_url=None):
     ext = BaseChapter.guess_extension(r.headers.get("content-type"))
     f = NamedTemporaryFile(suffix=ext, delete=False)
     download_start_time = int(time.time())
     try:
         for chunk in r.iter_content(chunk_size=4096):
             if chunk:
                 f.write(chunk)
     except ConnectionError:
         f.flush()
         # page failed to download, send failure report
         if debug:
             output.warning("Mangadex API: send failure report")
         requests.post("https://api.mangadex.network/report",
                       data={
                           "url": page_url,
                           "success": False,
                           "bytes": f.tell(),
                           "duration":
                           int(time.time()) - download_start_time,
                           "cached":
                           True if r.headers.get("X-Cache") else False
                       })
         raise exceptions.ScrapingError
     f.flush()
     # page download successful, send success report
     if debug:
         output.warning("Mangadex API: send success report")
     requests.post("https://api.mangadex.network/report",
                   data={
                       "url": page_url,
                       "success": True,
                       "bytes": f.tell(),
                       "duration": int(time.time()) - download_start_time,
                       "cached": True if r.headers.get("X-Cache") else False
                   })
     f.close()
     r.close()
     return ((page_num, f))
Пример #11
0
    def follow(self, ignore=False):
        """Adds the series details to database and all current chapters."""

        try:
            s = db.session.query(db.Series).filter_by(url=self.url).one()
        except NoResultFound:
            s = db.Series(self)
            s.check_alias_uniqueness()
            output.series('Adding follow for {s.name} ({s.alias})'.format(s=s))
            db.session.add(s)
            db.session.commit()
        else:
            if s.following:
                output.warning(
                    'You are already following {s.name} ({s.alias})'.format(
                        s=s))
            else:
                s.directory = self.directory
                s.following = True
                db.session.commit()

        for chapter in self.chapters:
            chapter.save(s, ignore=ignore)