Python get_source_code примеры, util.util.get_source_code Python примеры использования

Пример #1

0

Показать файл

Файл: starkana.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        source = get_source_code(url, self.options.proxy)

        self.chapters = Starkana.re_get_chapters.findall(source)
        self.chapters.reverse()

        if not self.chapters:
            raise self.MangaNotFound

        lower_range = 0

        for i in range(0, len(self.chapters)):
            self.chapters[i] = ('%s%s' % (self.chapters[i][0], self.base_url), self.chapters[i][2], self.chapters[i][2])
            if not self.options.auto:
                print('(%i) %s' % (i + 1, self.chapters[i][1]))
            else:
                if self.options.lastDownloaded == self.chapters[i][1]:
                    lower_range = i + 1

        upper_range = len(self.chapters)

        if not self.options.auto:
            self.chapters_to_download = self.select_chapters(self.chapters)
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)

        return

Пример #2

0

Показать файл

Файл: mangareader.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        source = get_source_code(url, self.options.proxy)

        self.chapters = MangaReader.re_get_chapters.findall(source)

        lower_range = 0

        for i in range(0, len(self.chapters)):
            chapter_number = self.chapters[i][1].replace(self.options.manga, '').strip()
            self.chapters[i] = (
                '%s%s' % (self.base_url, self.chapters[i][0]), '%s%s' % (chapter_number, self.chapters[i][2]),
                chapter_number)
            if not self.options.auto:
                print('(%i) %s' % (i + 1, self.chapters[i][1]))
            else:
                if self.options.lastDownloaded == self.chapters[i][1].decode('utf-8'):
                    lower_range = i + 1

        upper_range = len(self.chapters)
        if not self.options.auto:
            self.chapters_to_download = self.select_chapters(self.chapters)
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)

        return

Пример #3

0

Показать файл

Файл: mangareader.py Проект: joaquinpf/manga_downloader

 def get_manga_url(self):
     url = '%s/alphabetical' % self.base_url
     source = get_source_code(url, self.options.proxy)
     all_series = MangaReader.re_get_series.findall(source[source.find('series_col'):])
     keyword = self.select_from_results(all_series)
     url = (self.base_url + '%s') % keyword
     return url

Пример #4

0

Показать файл

Файл: batoto.py Проект: joaquinpf/manga_downloader

 def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter):
     """We ignore max_pages, because you can't regex-search that under Batoto."""
     s = get_source_code(url, self.options.proxy)
     soup = BeautifulSoup(s)
     ol = soup.find("select", id="page_select")("option")
     n = 1
     for i in ol:
         self.download_image(n, i['value'], manga_chapter_prefix, max_pages, current_chapter)
         n += 1

Пример #5

0

Показать файл

Файл: eatmanga.py Проект: joaquinpf/manga_downloader

    def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter):
        pages = EatManga.re_get_page.findall(get_source_code(url, self.options.proxy))

        # Remove duplicate pages if any and ensure order
        pages = list(OrderedDict.fromkeys(pages))

        for page in pages:
            page_url = 'http://eatmanga.com%s' % page[0]
            self.download_image(page[1], page_url, manga_chapter_prefix, max_pages, current_chapter)

Пример #6

0

Показать файл

Файл: batoto.py Проект: joaquinpf/manga_downloader

    def get_manga_url(self):
        url = "{}/search?name={}&name_cond=c".format(self.base_url, '+'.join(self.options.manga.split()))
        s = get_source_code(url, self.options.proxy)
        soup = BeautifulSoup(s)
        a = soup.find("div", id="comic_search_results")
        r = a.tbody.find_all("tr")[1:]
        seriesl = []
        for i in r:
            try:
                e = i.td.findAll('a')[1]
                u = e['href']
                t = e.img.next_sibling[1:]
                seriesl.append((u, t.encode('utf-8')))
            except:
                pass
        if not seriesl:
            # signifies no manga found
            raise self.MangaNotFound("Nonexistent.")
        url = self.select_from_results(seriesl)

        return url

Пример #7

0

Показать файл

Файл: eatmanga.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        source = get_source_code(url, self.options.proxy)

        self.chapters = EatManga.re_get_chapters.findall(source)
        self.chapters.reverse()

        if not self.chapters:
            raise self.MangaNotFound

        lower_range = 0

        for i in range(0, len(self.chapters)):
            if 'upcoming' in self.chapters[i][0]:
                # Skip not available chapters
                del self.chapters[i]
                continue

            chapter_number = self.chapters[i][2].replace(self.options.manga, '').strip()
            self.chapters[i] = ('%s%s' % (self.base_url, self.chapters[i][0]), chapter_number, chapter_number)
            if not self.options.auto:
                print('(%i) %s' % (i + 1, self.chapters[i][1]))
            else:
                if self.options.lastDownloaded == self.chapters[i][1]:
                    lower_range = i + 1

        upper_range = len(self.chapters)

        if not self.options.auto:
            self.chapters_to_download = self.select_chapters(self.chapters)
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)

        return

Пример #8

0

Показать файл

Файл: mangareader.py Проект: joaquinpf/manga_downloader

 def download_chapter(self, max_pages, url, manga_chapter_prefix, current_chapter):
     for page in MangaReader.re_get_page.findall(get_source_code(url, self.options.proxy)):
         page_url = 'http://www.mangareader.net' + page[0]
         self.download_image(page[1], page_url, manga_chapter_prefix, max_pages, current_chapter)

Пример #9

0

Показать файл

Файл: mangahere.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        source = get_source_code(url, self.options.proxy)

        if source is None or 'the page you have requested can' in source:
            # do a 'begins-with' search, then a 'contains' search
            url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split()))

            try:
                source = get_source_code(url, self.options.proxy)
                if 'Sorry you have just searched, please try 5 seconds later.' in source:
                    print('Searched too soon, waiting 5 seconds...')
                    time.sleep(5)

                series_results = []
                if source is not None:
                    series_results = MangaHere.re_get_series.findall(source)

                if 0 == len(series_results):
                    url = '%s/search.php?name=%s' % (self.base_url, '+'.join(self.options.manga.split()))
                    source = get_source_code(url, self.options.proxy)
                    if source is not None:
                        series_results = MangaHere.re_get_series.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound('It doesn\'t exist, or cannot be resolved by autocorrect.')
            else:
                keyword = self.select_from_results(series_results)
                url = '%s/manga/%s/' % (self.base_url, keyword)
                source = get_source_code(url, self.options.proxy)

        else:
            # The Guess worked
            keyword = fix_formatting(self.options.manga, '_', remove_special_chars=True, lower_case=True, use_ignore_chars=False)

        # other check for manga removal if our initial guess for the name was wrong
        if 'it is not available in' in source or "It's not available in" in source:
            raise self.MangaLicenced('It has been removed.')

        # that's nice of them
        # url = 'http://www.mangahere.com/cache/manga/%s/chapters.js' % keyword
        # source = getSourceCode(url, self.proxy)

        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        is_chapter_only = False

        # can't pre-compile this because relies on class name
        re_get_chapters = re.compile(
            'a.*?href="http://.*?mangahere.*?/manga/%s/(v[\d]+)/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
        self.chapters = re_get_chapters.findall(source)
        if not self.chapters:
            is_chapter_only = True
            re_get_chapters = re.compile(
                'a.*?href="http://.*?mangahere.*?/manga/%s/(c[\d]+(\.[\d]+)?)/[^"]*?"' % keyword)
            self.chapters = re_get_chapters.findall(source)

        # Sort chapters by volume and chapter number. Needed because next chapter isn't always accurate.
        self.chapters = sorted(self.chapters, cmp=self.chapter_compare)

        lower_range = 0

        if is_chapter_only:
            for i in range(0, len(self.chapters)):
                if self.options.auto:
                    if self.options.lastDownloaded == self.chapters[i][0]:
                        lower_range = i + 1

                ch_number = self.re_non_decimal.sub('', self.chapters[i][0])
                self.chapters[i] = (
                    '%s/manga/%s/%s' % (self.base_url, keyword, self.chapters[i][0]), self.chapters[i][0],
                    ch_number)

        else:
            for i in range(0, len(self.chapters)):

                ch_number = self.re_non_decimal.sub('', self.chapters[i][1])
                self.chapters[i] = (
                    '%s/manga/%s/%s/%s' % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1], ch_number)
                if self.options.auto:
                    if self.options.lastDownloaded == self.chapters[i][1]:
                        lower_range = i + 1

        upper_range = len(self.chapters)

        # Validate whether the last chapter is available
        source = get_source_code(self.chapters[upper_range - 1][0], self.options.proxy)

        if ('not available yet' in source) or ('Sorry, the page you have requested can’t be found' in source):
            # If the last chapter is not available remove it from the list
            del self.chapters[upper_range - 1]
            upper_range -= 1

        # which ones do we want?
        if not self.options.auto:
            for i in range(0, upper_range):
                if is_chapter_only:
                    print('(%i) %s' % (i + 1, self.chapters[i][0]))
                else:
                    print('(%i) %s' % (i + 1, self.chapters[i][1]))

            self.chapters_to_download = self.select_chapters(self.chapters)
        # XML component
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)
        return

Пример #10

0

Показать файл

Файл: mangafox.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        source, redirect_url = get_source_code(url, self.options.proxy, True)

        if redirect_url != url or source is None or "the page you have requested cannot be found" in source:
            # Could not find the manga page by guessing
            # Use the website search
            url = "%s/search.php?name_method=bw&name=%s&is_completed=&advopts=1" % (
                self.base_url,
                "+".join(self.options.manga.split()),
            )
            if self.options.verbose_FLAG:
                print(url)
            try:
                source = get_source_code(url, self.options.proxy)
                series_results = []
                if source is not None:
                    series_results = MangaFox.re_get_series.findall(source)

                if 0 == len(series_results):
                    url = "%s/search.php?name_method=cw&name=%s&is_completed=&advopts=1" % (
                        self.base_url,
                        "+".join(self.options.manga.split()),
                    )
                    if self.options.verbose_FLAG:
                        print(url)
                    source = get_source_code(url, self.options.proxy)
                    if source is not None:
                        series_results = MangaFox.re_get_series.findall(source)

            # 0 results
            except AttributeError:
                raise self.MangaNotFound("It doesn't exist, or cannot be resolved by autocorrect.")
            else:
                keyword = self.select_from_results(series_results)
                if self.options.verbose_FLAG:
                    print("Keyword: %s" % keyword)
                url = self.base_url % keyword
                if self.options.verbose_FLAG:
                    print("URL: %s" % url)
                source = get_source_code(url, self.options.proxy)

                if source is None:
                    raise self.MangaNotFound("Search Failed to find Manga.")
        else:
            # The Guess worked
            keyword = fix_formatting(self.options.manga)
            if self.options.verbose_FLAG:
                print("Keyword: %s" % keyword)

        if "it is not available in Manga Fox." in source:
            raise self.MangaNotFound("It has been removed.")

        # that's nice of them
        # url = 'http://mangafox.me/cache/manga/%s/chapters.js' % keyword
        # source = getSourceCode(url, self.proxy)
        # chapters is a 2-tuple
        # chapters[0] contains the chapter URL
        # chapters[1] contains the chapter title

        is_chapter_only = False

        # can't pre-compile this because relies on class name
        re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(v[\d]+)/(c[\d]+)/[^"]*?" title' % keyword)
        self.chapters = re_get_chapters.findall(source)
        if not self.chapters:
            if self.options.verbose_FLAG:
                print("Trying chapter only regex")
            is_chapter_only = True
            re_get_chapters = re.compile('a href="http://.*?mangafox.*?/manga/%s/(c[\d]+)/[^"]*?" title' % keyword)
            self.chapters = re_get_chapters.findall(source)

        self.chapters.reverse()

        lower_range = 0

        if is_chapter_only:
            for i in range(0, len(self.chapters)):
                if self.options.verbose_FLAG:
                    print("%s" % self.chapters[i])
                if not self.options.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i]))
                else:
                    if self.options.lastDownloaded == self.chapters[i]:
                        lower_range = i + 1

                self.chapters[i] = (
                    "%s/manga/%s/%s" % (self.base_url, keyword, self.chapters[i]),
                    self.chapters[i],
                    self.chapters[i],
                )

        else:
            for i in range(0, len(self.chapters)):
                if self.options.verbose_FLAG:
                    print("%s %s" % (self.chapters[i][0], self.chapters[i][1]))
                self.chapters[i] = (
                    "%s/manga/%s/%s/%s" % (self.base_url, keyword, self.chapters[i][0], self.chapters[i][1]),
                    self.chapters[i][0] + "." + self.chapters[i][1],
                    self.chapters[i][1],
                )
                if not self.options.auto:
                    print("(%i) %s" % (i + 1, self.chapters[i][1]))
                else:
                    if self.options.lastDownloaded == self.chapters[i][1]:
                        lower_range = i + 1

        upper_range = len(self.chapters)

        # which ones do we want?
        if not self.options.auto:
            self.chapters_to_download = self.select_chapters(self.chapters)
        # XML component
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)
        return

Пример #11

0

Показать файл

Файл: batoto.py Проект: joaquinpf/manga_downloader

    def parse_site(self, url):

        s = get_source_code(url, self.options.proxy)
        soup = BeautifulSoup(s)
        t = soup.find("table", class_="chapters_list").tbody
        cl = t.find_all("tr", class_="lang_English")
        self.chapters = [[]]
        cnum = self.chapters[0]

        for i in cl:
            u = i.td.a['href']
            t = i.td.a.img.next_sibling[1:]
            g = i.find_all("td")[2].get_text().strip()

            try:
                c = float(re.search("ch([\d.]+)", u).group(1))
                c = str(int(c)) if c.is_integer() else str(c)
            except AttributeError:
                c = '0'
            tu = (u, t, c, g)
            if len(cnum) == 0 or cnum[0][3] == c:
                cnum.append(tu)
            else:
                self.chapters.append([])
                cnum = self.chapters[-1]
                cnum.append(tu)

        self.chapters.reverse()

        if self.chapters == [[]]:
            raise self.MangaLicenced('It has been removed.')

        # Look for first chapter that should be downloaded in auto mode
        lower_range = 0
        if self.options.auto:
            for i in range(0, len(self.chapters)):
                if self.options.lastDownloaded == self.chapters[i][0][1]:
                    lower_range = i + 1

        sc = None
        for i in self.chapters:
            if len(i) == 1 or sc is None:
                sc = i[0]
                del i[1:]
                continue
            ll = [n for n in i if n[2] == sc[2]]
            if len(ll) != 1:
                c = self.get_next_url(sc[0])
                i[0] = [n for n in i if n[0] == c][0]
                if self.options.verbose_FLAG:
                    print("Anomaly at chapter {} ({} matches, chose {})".format(i[0][3], len(ll), i[0][2]))
                del i[1:]
                sc = i[0]
                continue
            i[0] = ll[0]
            sc = i[0]
            del i[1:]
        self.chapters = [i[0] for i in self.chapters]

        upper_range = len(self.chapters)
        # which ones do we want?
        if not self.options.auto:
            for n, c in enumerate(self.chapters):
                print("{:03d}. {}".format(n + 1, c[1].encode('utf-8')))
            self.chapters_to_download = self.select_chapters(self.chapters)
        # XML component
        else:
            if lower_range == upper_range:
                raise self.NoUpdates

            for i in range(lower_range, upper_range):
                self.chapters_to_download.append(i)
        return

Пример #12

0

Показать файл

Файл: batoto.py Проект: joaquinpf/manga_downloader

 def get_next_url(self, c):
     s = get_source_code(c, self.options.proxy)
     soup = BeautifulSoup(s)
     l = soup.find("img", title="Next Chapter").parent
     return l['href']

Python get_source_code примеры использования