示例#1
0
文件: parser.py 项目: TGbusWD/nhentai
def search_parser(keyword, page):
    logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
    result = []
    try:
        response = request('get',
                           url=constant.SEARCH_URL,
                           params={
                               'q': keyword,
                               'page': page
                           }).content
    except requests.ConnectionError as e:
        logger.critical(e)
        logger.warn(
            'If you are in China, please configure the proxy to fu*k GFW.')
        raise SystemExit

    html = BeautifulSoup(response, 'html.parser')
    doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'})
    for doujinshi in doujinshi_search_result:
        doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
        title = doujinshi_container.text.strip()
        title = title if len(title) < 85 else title[:82] + '...'
        id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1)
        result.append({'id': id_, 'title': title})
    if not result:
        logger.warn('Not found anything of keyword {}'.format(keyword))

    return result
示例#2
0
    def _download(self, url, folder='', filename='', retried=0):
        logger.info('Start downloading: {0} ...'.format(url))
        filename = filename if filename else os.path.basename(urlparse(url).path)
        base_filename, extension = os.path.splitext(filename)
        try:
            with open(os.path.join(folder, base_filename.zfill(3) + extension), "wb") as f:
                response = request('get', url, stream=True, timeout=self.timeout)
                if response.status_code != 200:
                    raise NhentaiImageNotExistException
                length = response.headers.get('content-length')
                if length is None:
                    f.write(response.content)
                else:
                    for chunk in response.iter_content(2048):
                        f.write(chunk)

        except (requests.HTTPError, requests.Timeout) as e:
            if retried < 3:
                logger.warning('Warning: {0}, retrying({1}) ...'.format(str(e), retried))
                return 0, self._download(url=url, folder=folder, filename=filename, retried=retried+1)
            else:
                return 0, None

        except NhentaiImageNotExistException as e:
            os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
            return -1, url

        except Exception as e:
            logger.critical(str(e))
            return 0, None

        return 1, url
示例#3
0
    def download(self):
        logger.info('Starting to download doujinshi: %s' % self.name)
        if self.downloader:
            download_queue = []

            if len(self.ext) != self.pages:
                logger.warning('Page count and ext count do not equal')

            for i in range(1, min(self.pages, len(self.ext)) + 1):
                download_queue.append(
                    '%s/%d/%d.%s' %
                    (IMAGE_URL, int(self.img_id), i, self.ext[i - 1]))

            self.downloader.download(download_queue, self.filename)

            with open(os.path.join(self.path, self.filename, 'ComicInfo.xml'),
                      "w") as f:
                f.write(self.comicinfoXML)
            '''
            for i in range(len(self.ext)):
                download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i+1, EXT_MAP[self.ext[i]]))
            '''

        else:
            logger.critical('Downloader has not been loaded')
示例#4
0
    def download(self, queue, folder='', regenerate_cbz=False):
        if not isinstance(folder, text):
            folder = str(folder)

        if self.path:
            folder = os.path.join(self.path, folder)

        if os.path.exists(folder + '.cbz'):
            if not regenerate_cbz:
                logger.warning('CBZ file \'{}.cbz\' exists, ignored download request'.format(folder))
                return

        if not os.path.exists(folder):
            logger.warning('Path \'{0}\' does not exist, creating.'.format(folder))
            try:
                os.makedirs(folder)
            except EnvironmentError as e:
                logger.critical('{0}'.format(str(e)))

        else:
            logger.warning('Path \'{0}\' already exist.'.format(folder))

        queue = [(self, url, folder, constant.CONFIG['proxy']) for url in queue]

        pool = multiprocessing.Pool(self.size, init_worker)
        [pool.apply_async(download_wrapper, args=item) for item in queue]

        pool.close()
        pool.join()
示例#5
0
def doujinshi_parser(id_):
    if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}'.format(constant.DETAIL_URL, id_)

    try:
        response = request('get', url).json()
    except Exception as e:
        logger.critical(str(e))
        exit(1)

    doujinshi['name'] = str(response['title']['english'].encode('utf-8'))[2:]
    doujinshi['subtitle'] = response['title']['japanese']
    doujinshi['img_id'] = response['media_id']
    doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages']))
    doujinshi['pages'] = len(response['images']['pages'])

    # gain information of the doujinshi
    needed_fields = ['character', 'artist', 'language']
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
            if tag_type not in doujinshi:
                doujinshi[tag_type] = tag['name']
            else:
                doujinshi[tag_type] += tag['name']

    return doujinshi
示例#6
0
    def download(self, queue, folder=''):
        if not isinstance(folder, text):
            folder = str(folder)

        if self.path:
            folder = os.path.join(self.path, folder)

        if not os.path.exists(folder):
            logger.warning(
                'Path \'{0}\' does not exist, creating.'.format(folder))
            try:
                os.makedirs(folder)
            except EnvironmentError as e:
                logger.critical('{0}'.format(str(e)))

        else:
            logger.warning('Path \'{0}\' already exist.'.format(folder))

        queue = [(self, url, folder) for url in queue]

        pool = multiprocessing.Pool(self.size, init_worker)
        [pool.apply_async(download_wrapper, args=item) for item in queue]

        pool.close()
        pool.join()
示例#7
0
 def _download(self, url, folder='', filename='', retried=False):
     logger.info('Start downloading: {0} ...'.format(url))
     filename = filename if filename else os.path.basename(
         urlparse(url).path)
     base_filename, extension = os.path.splitext(filename)
     try:
         with open(os.path.join(folder,
                                base_filename.zfill(3) + extension),
                   "wb") as f:
             response = request('get',
                                url,
                                stream=True,
                                timeout=self.timeout)
             length = response.headers.get('content-length')
             if length is None:
                 f.write(response.content)
             else:
                 for chunk in response.iter_content(2048):
                     f.write(chunk)
     except requests.HTTPError as e:
         if not retried:
             logger.error('Error: {0}, retrying'.format(str(e)))
             return self._download(url=url,
                                   folder=folder,
                                   filename=filename,
                                   retried=True)
         else:
             return None
     except Exception as e:
         logger.critical(str(e))
         return None
     return url
示例#8
0
    def _download(self, url, folder='', filename='', retried=0):
        logger.info('Starting to download {0} ...'.format(url))
        filename = filename if filename else os.path.basename(
            urlparse(url).path)
        base_filename, extension = os.path.splitext(filename)
        try:
            if os.path.exists(
                    os.path.join(folder,
                                 base_filename.zfill(3) + extension)):
                logger.warning('File: {0} exists, ignoring'.format(
                    os.path.join(folder,
                                 base_filename.zfill(3) + extension)))
                return 1, url

            with open(os.path.join(folder,
                                   base_filename.zfill(3) + extension),
                      "wb") as f:
                i = 0
                while i < 10:
                    try:
                        response = request('get',
                                           url,
                                           stream=True,
                                           timeout=self.timeout)
                    except Exception as e:
                        i += 1
                        if not i < 10:
                            logger.critical(str(e))
                            return 0, None
                        continue
                    break
                if response.status_code != 200:
                    raise NhentaiImageNotExistException
                length = response.headers.get('content-length')
                if length is None:
                    f.write(response.content)
                else:
                    for chunk in response.iter_content(2048):
                        f.write(chunk)

        except (requests.HTTPError, requests.Timeout) as e:
            if retried < 3:
                logger.warning('Warning: {0}, retrying({1}) ...'.format(
                    str(e), retried))
                return 0, self._download(url=url,
                                         folder=folder,
                                         filename=filename,
                                         retried=retried + 1)
            else:
                return 0, None

        except NhentaiImageNotExistException as e:
            os.remove(os.path.join(folder, base_filename.zfill(3) + extension))
            return -1, url

        except Exception as e:
            logger.critical(str(e))
            return 0, None

        return 1, url
示例#9
0
def __api_suspended_search_parser(keyword, page):
    logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
    result = []
    i = 0
    while i < 5:
        try:
            response = request('get',
                               url=constant.SEARCH_URL,
                               params={
                                   'query': keyword,
                                   'page': page
                               }).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                logger.warn(
                    'If you are in China, please configure the proxy to fu*k GFW.'
                )
                exit(1)
            continue
        break

    if 'result' not in response:
        raise Exception('No result in response')

    for row in response['result']:
        title = row['title']['english']
        title = title[:85] + '..' if len(title) > 85 else title
        result.append({'id': row['id'], 'title': title})

    if not result:
        logger.warn('No results for keywords {}'.format(keyword))

    return result
示例#10
0
文件: parser.py 项目: RicterZ/nhentai
def __api_suspended_search_parser(keyword, page):
    logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
    result = []
    i = 0
    while i < 5:
        try:
            response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
                exit(1)
            continue
        break

    if 'result' not in response:
        raise Exception('No result in response')

    for row in response['result']:
        title = row['title']['english']
        title = title[:85] + '..' if len(title) > 85 else title
        result.append({'id': row['id'], 'title': title})

    if not result:
        logger.warn('No results for keywords {}'.format(keyword))

    return result
示例#11
0
    def download(self, queue, folder=''):
        if not isinstance(folder, text):
            folder = str(folder)

        if self.path:
            folder = os.path.join(self.path, folder)

        if not os.path.exists(folder):
            logger.warn(
                'Path \'{0}\' does not exist, creating.'.format(folder))
            try:
                os.makedirs(folder)
            except EnvironmentError as e:
                logger.critical('{0}'.format(str(e)))
                exit(1)
        else:
            logger.warn('Path \'{0}\' already exist.'.format(folder))

        queue = [([url], {'folder': folder}) for url in queue]

        self.thread_pool = threadpool.ThreadPool(self.thread_count)
        requests_ = threadpool.makeRequests(self._download, queue,
                                            self._download_callback)
        [self.thread_pool.putRequest(req) for req in requests_]

        self.thread_pool.wait()
示例#12
0
文件: parser.py 项目: foxfur1/nhentai
def doujinshi_parser(id_):
    if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}/'.format(constant.DETAIL_URL, id_)

    try:
        response = request('get', url).content
    except Exception as e:
        logger.critical(str(e))
        raise SystemExit

    html = BeautifulSoup(response, 'html.parser')
    doujinshi_info = html.find('div', attrs={'id': 'info'})

    title = doujinshi_info.find('h1').text
    subtitle = doujinshi_info.find('h2')

    doujinshi['name'] = title
    doujinshi['subtitle'] = subtitle.text if subtitle else ''

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
        _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
        ext.append(ext_name)

    if not img_id:
        logger.critical('Tried yo get image id failed')
        exit(1)

    doujinshi['img_id'] = img_id.group(1)
    doujinshi['ext'] = ext

    pages = 0
    for _ in doujinshi_info.find_all('div', class_=''):
        pages = re.search('([\d]+) pages', _.text)
        if pages:
            pages = pages.group(1)
            break
    doujinshi['pages'] = int(pages)

    # gain information of the doujinshi
    information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'})
    needed_fields = ['Characters', 'Artists', 'Language', 'Tags']
    for field in information_fields:
        field_name = field.contents[0].strip().strip(':')
        if field_name in needed_fields:
            data = [sub_field.contents[0].strip() for sub_field in
                    field.find_all('a', attrs={'class': 'tag'})]
            doujinshi[field_name.lower()] = ', '.join(data)

    return doujinshi
示例#13
0
文件: parser.py 项目: RicterZ/nhentai
def doujinshi_parser(id_):
    if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}/'.format(constant.DETAIL_URL, id_)

    try:
        response = request('get', url).content
    except Exception as e:
        logger.critical(str(e))
        raise SystemExit

    html = BeautifulSoup(response, 'html.parser')
    doujinshi_info = html.find('div', attrs={'id': 'info'})

    title = doujinshi_info.find('h1').text
    subtitle = doujinshi_info.find('h2')

    doujinshi['name'] = title
    doujinshi['subtitle'] = subtitle.text if subtitle else ''

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    img_id = re.search('/galleries/([\d]+)/cover\.(jpg|png)$', doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
        _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
        ext.append(ext_name)

    if not img_id:
        logger.critical('Tried yo get image id failed')
        exit(1)

    doujinshi['img_id'] = img_id.group(1)
    doujinshi['ext'] = ext

    pages = 0
    for _ in doujinshi_info.find_all('div', class_=''):
        pages = re.search('([\d]+) pages', _.text)
        if pages:
            pages = pages.group(1)
            break
    doujinshi['pages'] = int(pages)

    # gain information of the doujinshi
    information_fields = doujinshi_info.find_all('div', attrs={'class': 'field-name'})
    needed_fields = ['Characters', 'Artists', 'Language', 'Tags']
    for field in information_fields:
        field_name = field.contents[0].strip().strip(':')
        if field_name in needed_fields:
            data = [sub_field.contents[0].strip() for sub_field in
                    field.find_all('a', attrs={'class': 'tag'})]
            doujinshi[field_name.lower()] = ', '.join(data)

    return doujinshi
示例#14
0
 def download(self):
     logger.info('Start download doujinshi: %s' % self.name)
     if self.downloader:
         download_queue = []
         for i in range(1, self.pages + 1):
             download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext))
         self.downloader.download(download_queue, self.id)
     else:
         logger.critical('Downloader has not be loaded')
示例#15
0
 def _download_callback(self, request, result):
     result, data = result
     if result == 0:
         logger.critical('fatal errors occurred, quit.')
         exit(1)
     elif result == -1:
         logger.warning('url {} return status code 404'.format(data))
     else:
         logger.log(15, '{0} download successfully'.format(data))
示例#16
0
 def download(self):
     logger.info('Start download doujinshi: %s' % self.name)
     if self.downloader:
         download_queue = []
         for i in range(1, self.pages + 1):
             download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext))
         self.downloader.download(download_queue, format_filename('%s-%s' % (self.id, self.name[:200])))
     else:
         logger.critical('Downloader has not be loaded')
示例#17
0
 def _download_callback(self, request, result):
     result, data = result
     if result == 0:
         logger.critical('fatal errors occurred, quit.')
         exit(1)
     elif result == -1:
         logger.warning('url {} return status code 404'.format(data))
     else:
         logger.log(15, '{0} download successfully'.format(data))
示例#18
0
def search_parser(keyword, sorting, page, is_page_all=False):
    # keyword = '+'.join([i.strip().replace(' ', '-').lower() for i in keyword.split(',')])
    result = []
    response = None
    if not page:
        page = [1]

    if is_page_all:
        url = request('get',
                      url=constant.SEARCH_URL,
                      params={
                          'query': keyword
                      }).url
        init_response = request('get', url.replace('%2B', '+')).json()
        page = range(1, init_response['num_pages'] + 1)

    total = '/{0}'.format(page[-1]) if is_page_all else ''
    not_exists_persist = False
    for p in page:
        i = 0

        logger.info(
            'Searching doujinshis using keywords "{0}" on page {1}{2}'.format(
                keyword, p, total))
        while i < 3:
            try:
                url = request('get',
                              url=constant.SEARCH_URL,
                              params={
                                  'query': keyword,
                                  'page': p,
                                  'sort': sorting
                              }).url
                response = request('get', url.replace('%2B', '+')).json()
            except Exception as e:
                logger.critical(str(e))
                response = None
            break

        if response is None or 'result' not in response:
            logger.warning('No result in response in page {}'.format(p))
            if not_exists_persist is True:
                break
            continue

        for row in response['result']:
            title = row['title']['english']
            title = title[:85] + '..' if len(title) > 85 else title
            result.append({'id': row['id'], 'title': title})

        not_exists_persist = False
        if not result:
            logger.warning('No results for keywords {}'.format(keyword))

    return result
示例#19
0
    def download(self):
        logger.info('Starting to download doujinshi: %s' % self.name)
        if self.downloader:
            download_queue = []
            if len(self.ext) != self.pages:
                logger.warning('Page count and ext count do not equal')

            for i in range(1, min(self.pages, len(self.ext)) + 1):
                download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i - 1]))

            self.downloader.download(download_queue, self.filename)
        else:
            logger.critical('Downloader has not been loaded')
示例#20
0
def generate_html(output_dir='.', doujinshi_obj=None, template='default'):
    image_html = ''

    if doujinshi_obj is not None:
        doujinshi_dir = os.path.join(output_dir, doujinshi_obj.filename)
    else:
        doujinshi_dir = '.'

    if not os.path.exists(doujinshi_dir):
        logger.warning(
            'Path \'{0}\' does not exist, creating.'.format(doujinshi_dir))
        try:
            os.makedirs(doujinshi_dir)
        except EnvironmentError as e:
            logger.critical('{0}'.format(str(e)))

    file_list = os.listdir(doujinshi_dir)
    file_list.sort()

    for image in file_list:
        if not os.path.splitext(image)[1] in ('.jpg', '.png'):
            continue

        image_html += '<img src="{0}" class="image-item"/>\n'\
            .format(image)
    html = readfile('viewer/{}/index.html'.format(template))
    css = readfile('viewer/{}/styles.css'.format(template))
    js = readfile('viewer/{}/scripts.js'.format(template))

    if doujinshi_obj is not None:
        serialize_json(doujinshi_obj, doujinshi_dir)
        name = doujinshi_obj.name
        if sys.version_info < (3, 0):
            name = doujinshi_obj.name.encode('utf-8')
    else:
        name = {'title': 'nHentai HTML Viewer'}

    data = html.format(TITLE=name, IMAGES=image_html, SCRIPTS=js, STYLES=css)
    try:
        if sys.version_info < (3, 0):
            with open(os.path.join(doujinshi_dir, 'index.html'), 'w') as f:
                f.write(data)
        else:
            with open(os.path.join(doujinshi_dir, 'index.html'), 'wb') as f:
                f.write(data.encode('utf-8'))

        logger.log(
            15, 'HTML Viewer has been written to \'{0}\''.format(
                os.path.join(doujinshi_dir, 'index.html')))
    except Exception as e:
        logger.warning('Writing HTML Viewer failed ({})'.format(str(e)))
示例#21
0
def tag_parser(tag_id, max_page=1):
    logger.info('Searching for doujinshi with tag id {0}'.format(tag_id))
    result = []
    i = 0
    while i < 5:
        try:
            response = request('get',
                               url=constant.TAG_API_URL,
                               params={
                                   'sort': 'popular',
                                   'tag_id': tag_id
                               }).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                exit(1)
            continue
        break
    page = max_page if max_page <= response['num_pages'] else int(
        response['num_pages'])

    for i in range(1, page + 1):
        logger.info('Getting page {} ...'.format(i))

        if page != 1:
            i = 0
            while i < 5:
                try:
                    response = request('get',
                                       url=constant.TAG_API_URL,
                                       params={
                                           'sort': 'popular',
                                           'tag_id': tag_id
                                       }).json()
                except Exception as e:
                    i += 1
                    if not i < 5:
                        logger.critical(str(e))
                        exit(1)
                    continue
                break
    for row in response['result']:
        title = row['title']['english']
        title = title[:85] + '..' if len(title) > 85 else title
        result.append({'id': row['id'], 'title': title})

    if not result:
        logger.warn('No results for tag id {}'.format(tag_id))

    return result
示例#22
0
文件: parser.py 项目: fakegit/nhentai
def __api_suspended_doujinshi_parser(id_):
    if not isinstance(id_,
                      (int, )) and (isinstance(id_,
                                               (str, )) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}'.format(constant.DETAIL_URL, id_)
    i = 0
    while 5 > i:
        try:
            response = request('get', url).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                exit(1)
            continue
        break

    doujinshi['name'] = response['title']['english']
    doujinshi['subtitle'] = response['title']['japanese']
    doujinshi['img_id'] = response['media_id']
    doujinshi['ext'] = ''.join([i['t'] for i in response['images']['pages']])
    doujinshi['pages'] = len(response['images']['pages'])

    # gain information of the doujinshi
    needed_fields = [
        'character', 'artist', 'language', 'tag', 'parody', 'group', 'category'
    ]
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
            if tag_type == 'tag':
                if tag_type not in doujinshi:
                    doujinshi[tag_type] = {}

                tag['name'] = tag['name'].replace(' ', '-')
                tag['name'] = tag['name'].lower()
                doujinshi[tag_type][tag['name']] = tag['id']
            elif tag_type not in doujinshi:
                doujinshi[tag_type] = tag['name']
            else:
                doujinshi[tag_type] += ', ' + tag['name']

    return doujinshi
示例#23
0
文件: parser.py 项目: RicterZ/nhentai
def __api_suspended_doujinshi_parser(id_):
    if not isinstance(id_, (int,)) and (isinstance(id_, (str,)) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching information of doujinshi id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}'.format(constant.DETAIL_URL, id_)
    i = 0
    while 5 > i:
        try:
            response = request('get', url).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                exit(1)
            continue
        break

    doujinshi['name'] = response['title']['english']
    doujinshi['subtitle'] = response['title']['japanese']
    doujinshi['img_id'] = response['media_id']
    doujinshi['ext'] = ''.join(map(lambda s: s['t'], response['images']['pages']))
    doujinshi['pages'] = len(response['images']['pages'])

    # gain information of the doujinshi
    needed_fields = ['character', 'artist', 'language', 'tag']
    for tag in response['tags']:
        tag_type = tag['type']
        if tag_type in needed_fields:
            if tag_type == 'tag':
                if tag_type not in doujinshi:
                    doujinshi[tag_type] = {}

                tag['name'] = tag['name'].replace(' ', '-')
                tag['name'] = tag['name'].lower()
                doujinshi[tag_type][tag['name']] = tag['id']
            elif tag_type not in doujinshi:
                doujinshi[tag_type] = tag['name']
            else:
                doujinshi[tag_type] += ', ' + tag['name']

    return doujinshi
示例#24
0
def search_parser(keyword, page):
    logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
    try:
        response = request('get',
                           url=constant.SEARCH_URL,
                           params={
                               'q': keyword,
                               'page': page
                           }).content
    except requests.ConnectionError as e:
        logger.critical(e)
        logger.warn(
            'If you are in China, please configure the proxy to fu*k GFW.')
        raise SystemExit

    result = _get_title_and_id(response)
    if not result:
        logger.warn('Not found anything of keyword {}'.format(keyword))

    return result
示例#25
0
    def download(self):
        logger.info('Starting to download doujinshi: %s' % self.name)
        if self.downloader:
            download_queue = []

            if len(self.ext) != self.pages:
                logger.warning('Page count and ext count do not equal')

            for i in range(1, min(self.pages, len(self.ext)) + 1):
                download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i, self.ext[i-1]))

            self.downloader.download(download_queue, self.filename)

            '''
            for i in range(len(self.ext)):
                download_queue.append('%s/%d/%d.%s' % (IMAGE_URL, int(self.img_id), i+1, EXT_MAP[self.ext[i]]))
            '''

        else:
            logger.critical('Downloader has not been loaded')
示例#26
0
def tag_guessing(tag_name):
    tag_name = tag_name.lower()
    tag_name = tag_name.replace(' ', '-')
    logger.info('Trying to get tag_id of tag \'{0}\''.format(tag_name))
    i = 0
    while i < 5:
        try:
            response = request('get',
                               url='%s/%s' %
                               (constant.TAG_URL, tag_name)).content
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                exit(1)
            continue
        break

    html = BeautifulSoup(response, 'html.parser')
    first_item = html.find('div', attrs={'class': 'gallery'})
    if not first_item:
        logger.error(
            'Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
        return

    doujinshi_id = re.findall('(\d+)', first_item.a.attrs['href'])
    if not doujinshi_id:
        logger.error(
            'Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
        return

    ret = doujinshi_parser(doujinshi_id[0])
    if 'tag' in ret and tag_name in ret['tag']:
        tag_id = ret['tag'][tag_name]
        logger.info('Tag id of tag \'{0}\' is {1}'.format(tag_name, tag_id))
    else:
        logger.error(
            'Cannot find doujinshi id of tag \'{0}\''.format(tag_name))
        return

    return tag_id
示例#27
0
文件: parser.py 项目: h007/nhentai
def search_parser(keyword, page):
    logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
    result = []
    try:
        response = request('get', url=constant.SEARCH_URL, params={'query': keyword, 'page': page}).json()
        if 'result' not in response:
            raise Exception('No result in response')
    except requests.ConnectionError as e:
        logger.critical(e)
        logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
        exit(1)

    for row in response['result']:
        title = row['title']['english']
        title = title[:85] + '..' if len(title) > 85 else title
        result.append({'id': row['id'], 'title': title})

    if not result:
        logger.warn('Not found anything of keyword {}'.format(keyword))

    return result
示例#28
0
def search_parser(keyword, page):
    logger.debug('Searching doujinshis of keyword {0}'.format(keyword))
    result = []
    try:
        response = request('get', url=constant.SEARCH_URL, params={'q': keyword, 'page': page}).content
    except requests.ConnectionError as e:
        logger.critical(e)
        logger.warn('If you are in China, please configure the proxy to fu*k GFW.')
        exit(1)

    html = BeautifulSoup(response, 'html.parser')
    doujinshi_search_result = html.find_all('div', attrs={'class': 'gallery'})
    for doujinshi in doujinshi_search_result:
        doujinshi_container = doujinshi.find('div', attrs={'class': 'caption'})
        title = doujinshi_container.text.strip()
        title = (title[:85] + '..') if len(title) > 85 else title
        id_ = re.search('/g/(\d+)/', doujinshi.a['href']).group(1)
        result.append({'id': id_, 'title': title})
    if not result:
        logger.warn('Not found anything of keyword {}'.format(keyword))

    return result
示例#29
0
def search_parser(keyword, sorting, page):
    logger.debug('Searching doujinshis using keywords {0}'.format(keyword))
    keyword = '+'.join(
        [i.strip().replace(' ', '-').lower() for i in keyword.split(',')])
    result = []
    i = 0
    while i < 5:
        try:
            url = request('get',
                          url=constant.SEARCH_URL,
                          params={
                              'query': keyword,
                              'page': page,
                              'sort': sorting
                          }).url
            response = request('get', url.replace('%2B', '+')).json()
        except Exception as e:
            i += 1
            if not i < 5:
                logger.critical(str(e))
                logger.warn(
                    'If you are in China, please configure the proxy to fu*k GFW.'
                )
                exit(1)
            continue
        break

    if 'result' not in response:
        raise Exception('No result in response')

    for row in response['result']:
        title = row['title']['english']
        title = title[:85] + '..' if len(title) > 85 else title
        result.append({'id': row['id'], 'title': title})

    if not result:
        logger.warn('No results for keywords {}'.format(keyword))

    return result
示例#30
0
    def download(self, queue, folder=''):
        if not isinstance(folder, (text)):
            folder = str(folder)

        if self.path:
            folder = os.path.join(self.path, folder)

        if not os.path.exists(folder):
            logger.warn('Path \'{0}\' not exist.'.format(folder))
            try:
                os.makedirs(folder)
            except EnvironmentError as e:
                logger.critical('{0}'.format(str(e)))
                exit(1)
        else:
            logger.warn('Path \'{0}\' already exist.'.format(folder))

        queue = [([url], {'folder': folder}) for url in queue]

        self.thread_pool = threadpool.ThreadPool(self.thread_count)
        requests_ = threadpool.makeRequests(self._download, queue, self._download_callback)
        [self.thread_pool.putRequest(req) for req in requests_]

        self.thread_pool.wait()
示例#31
0
文件: parser.py 项目: fakegit/nhentai
def doujinshi_parser(id_):
    if not isinstance(id_,
                      (int, )) and (isinstance(id_,
                                               (str, )) and not id_.isdigit()):
        raise Exception('Doujinshi id({0}) is not valid'.format(id_))

    id_ = int(id_)
    logger.log(15, 'Fetching doujinshi information of id {0}'.format(id_))
    doujinshi = dict()
    doujinshi['id'] = id_
    url = '{0}/{1}/'.format(constant.DETAIL_URL, id_)

    try:
        response = request('get', url)
        if response.status_code in (200, ):
            response = response.content
        elif response.status_code in (404, ):
            logger.error("Doujinshi with id {0} cannot be found".format(id_))
            return []
        else:
            logger.debug('Slow down and retry ({}) ...'.format(id_))
            time.sleep(1)
            return doujinshi_parser(str(id_))

    except Exception as e:
        logger.warning('Error: {}, ignored'.format(str(e)))
        return None

    html = BeautifulSoup(response, 'html.parser')
    doujinshi_info = html.find('div', attrs={'id': 'info'})

    title = doujinshi_info.find('h1').text
    pretty_name = doujinshi_info.find('h1').find('span',
                                                 attrs={
                                                     'class': 'pretty'
                                                 }).text
    subtitle = doujinshi_info.find('h2')

    doujinshi['name'] = title
    doujinshi['pretty_name'] = pretty_name
    doujinshi['subtitle'] = subtitle.text if subtitle else ''

    doujinshi_cover = html.find('div', attrs={'id': 'cover'})
    img_id = re.search('/galleries/([0-9]+)/cover.(jpg|png|gif)$',
                       doujinshi_cover.a.img.attrs['data-src'])

    ext = []
    for i in html.find_all('div', attrs={'class': 'thumb-container'}):
        _, ext_name = os.path.basename(i.img.attrs['data-src']).rsplit('.', 1)
        ext.append(ext_name)

    if not img_id:
        logger.critical('Tried yo get image id failed')
        exit(1)

    doujinshi['img_id'] = img_id.group(1)
    doujinshi['ext'] = ext

    for _ in doujinshi_info.find_all('div', class_='tag-container field-name'):
        if re.search('Pages:', _.text):
            pages = _.find('span', class_='name').string
    doujinshi['pages'] = int(pages)

    # gain information of the doujinshi
    information_fields = doujinshi_info.find_all('div',
                                                 attrs={'class': 'field-name'})
    needed_fields = [
        'Characters', 'Artists', 'Languages', 'Tags', 'Parodies', 'Groups',
        'Categories'
    ]
    for field in information_fields:
        field_name = field.contents[0].strip().strip(':')
        if field_name in needed_fields:
            data = [
                sub_field.find('span', attrs={
                    'class': 'name'
                }).contents[0].strip()
                for sub_field in field.find_all('a', attrs={'class': 'tag'})
            ]
            doujinshi[field_name.lower()] = ', '.join(data)

    time_field = doujinshi_info.find('time')
    if time_field.has_attr('datetime'):
        doujinshi['date'] = time_field['datetime']
    return doujinshi
示例#32
0
def cmd_parser():
    parser = OptionParser()
    parser.add_option('--download',
                      dest='is_download',
                      action='store_true',
                      help='download doujinshi or not')
    parser.add_option('--id',
                      type='int',
                      dest='id',
                      action='store',
                      help='doujinshi id of nhentai')
    parser.add_option('--ids',
                      type='str',
                      dest='ids',
                      action='store',
                      help='doujinshi id set, e.g. 1,2,3')
    parser.add_option('--search',
                      type='string',
                      dest='keyword',
                      action='store',
                      help='keyword searched')
    parser.add_option('--page',
                      type='int',
                      dest='page',
                      action='store',
                      default=1,
                      help='page number of search result')
    parser.add_option('--path',
                      type='string',
                      dest='saved_path',
                      action='store',
                      default='',
                      help='path which save the doujinshi')
    parser.add_option('--threads',
                      '-t',
                      type='int',
                      dest='threads',
                      action='store',
                      default=5,
                      help='thread count of download doujinshi')
    parser.add_option('--timeout',
                      type='int',
                      dest='timeout',
                      action='store',
                      default=30,
                      help='timeout of download doujinshi')
    parser.add_option('--proxy',
                      type='string',
                      dest='proxy',
                      action='store',
                      default='',
                      help='use proxy, example: http://127.0.0.1:1080')
    args, _ = parser.parse_args()

    if args.ids:
        _ = map(lambda id: id.strip(), args.ids.split(','))
        args.ids = set(map(int, filter(lambda id: id.isdigit(), _)))

    if args.is_download and not args.id and not args.ids and not args.keyword:
        logger.critical('Doujinshi id/ids is required for downloading')
        parser.print_help()
        exit(0)

    if args.id:
        args.ids = (args.id, ) if not args.ids else args.ids

    if not args.keyword and not args.ids:
        parser.print_help()
        exit(0)

    if args.threads <= 0:
        args.threads = 1
    elif args.threads > 10:
        logger.critical('Maximum number of used threads is 10')
        exit(0)

    if args.proxy:
        import urlparse
        proxy_url = urlparse.urlparse(args.proxy)
        if proxy_url.scheme not in ('http', 'https'):
            logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(
                proxy_url.scheme))
        else:
            constant.PROXY = {proxy_url.scheme: args.proxy}

    return args
示例#33
0
 def _download_callback(self, request, result):
     if not result:
         logger.critical('Too many errors occurred, quit.')
         exit(1)
     logger.log(15, '{0} download successfully'.format(result))
示例#34
0
def cmd_parser():
    load_config()

    parser = OptionParser(
        '\n  nhentai --search [keyword] --download'
        '\n  NHENTAI=http://h.loli.club nhentai --id [ID ...]'
        '\n  nhentai --file [filename]'
        '\n\nEnvironment Variable:\n'
        '  NHENTAI                 nhentai mirror url')
    # operation options
    parser.add_option('--download',
                      '-D',
                      dest='is_download',
                      action='store_true',
                      help='download doujinshi (for search results)')
    parser.add_option('--show',
                      '-S',
                      dest='is_show',
                      action='store_true',
                      help='just show the doujinshi information')

    # doujinshi options
    parser.add_option('--id',
                      type='string',
                      dest='id',
                      action='store',
                      help='doujinshi ids set, e.g. 1,2,3')
    parser.add_option('--search',
                      '-s',
                      type='string',
                      dest='keyword',
                      action='store',
                      help='search doujinshi by keyword')
    parser.add_option('--favorites',
                      '-F',
                      action='store_true',
                      dest='favorites',
                      help='list or download your favorites.')

    # page options
    parser.add_option('--page-all',
                      dest='page_all',
                      action='store_true',
                      default=False,
                      help='all search results')
    parser.add_option('--page',
                      '--page-range',
                      type='string',
                      dest='page',
                      action='store',
                      default='',
                      help='page number of search results. e.g. 1,2-5,14')
    parser.add_option(
        '--sorting',
        dest='sorting',
        action='store',
        default='recent',
        help='sorting of doujinshi (recent / popular / popular-[today|week])',
        choices=['recent', 'popular', 'popular-today', 'popular-week'])

    # download options
    parser.add_option('--output',
                      '-o',
                      type='string',
                      dest='output_dir',
                      action='store',
                      default='./',
                      help='output dir')
    parser.add_option('--threads',
                      '-t',
                      type='int',
                      dest='threads',
                      action='store',
                      default=5,
                      help='thread count for downloading doujinshi')
    parser.add_option('--timeout',
                      '-T',
                      type='int',
                      dest='timeout',
                      action='store',
                      default=30,
                      help='timeout for downloading doujinshi')
    parser.add_option('--delay',
                      '-d',
                      type='int',
                      dest='delay',
                      action='store',
                      default=0,
                      help='slow down between downloading every doujinshi')
    parser.add_option(
        '--proxy',
        type='string',
        dest='proxy',
        action='store',
        default='',
        help='store a proxy, for example: -p \'http://127.0.0.1:1080\'')
    parser.add_option('--file',
                      '-f',
                      type='string',
                      dest='file',
                      action='store',
                      help='read gallery IDs from file.')
    parser.add_option('--format',
                      type='string',
                      dest='name_format',
                      action='store',
                      help='format the saved folder name',
                      default='[%i][%a][%t]')

    # generate options
    parser.add_option('--html',
                      dest='html_viewer',
                      action='store_true',
                      help='generate a html viewer at current directory')
    parser.add_option('--no-html',
                      dest='is_nohtml',
                      action='store_true',
                      help='don\'t generate HTML after downloading')
    parser.add_option(
        '--gen-main',
        dest='main_viewer',
        action='store_true',
        help='generate a main viewer contain all the doujin in the folder')
    parser.add_option('--cbz',
                      '-C',
                      dest='is_cbz',
                      action='store_true',
                      help='generate Comic Book CBZ File')
    parser.add_option('--pdf',
                      '-P',
                      dest='is_pdf',
                      action='store_true',
                      help='generate PDF file')
    parser.add_option(
        '--rm-origin-dir',
        dest='rm_origin_dir',
        action='store_true',
        default=False,
        help='remove downloaded doujinshi dir when generated CBZ or PDF file.')

    # nhentai options
    parser.add_option('--cookie',
                      type='str',
                      dest='cookie',
                      action='store',
                      help='set cookie of nhentai to bypass Google recaptcha')
    parser.add_option('--language',
                      type='str',
                      dest='language',
                      action='store',
                      help='set default language to parse doujinshis')
    parser.add_option('--clean-language',
                      dest='clean_language',
                      action='store_true',
                      default=False,
                      help='set DEFAULT as language to parse doujinshis')
    parser.add_option(
        '--save-download-history',
        dest='is_save_download_history',
        action='store_true',
        default=False,
        help=
        'save downloaded doujinshis, whose will be skipped if you re-download them'
    )
    parser.add_option('--clean-download-history',
                      action='store_true',
                      default=False,
                      dest='clean_download_history',
                      help='clean download history')

    try:
        sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
        print()
    except (NameError, TypeError):
        pass
    except UnicodeDecodeError:
        exit(0)

    args, _ = parser.parse_args(sys.argv[1:])

    if args.html_viewer:
        generate_html()
        exit(0)

    if args.main_viewer and not args.id and not args.keyword and not args.favorites:
        generate_main_html()
        exit(0)

    if args.clean_download_history:
        with DB() as db:
            db.clean_all()

        logger.info('Download history cleaned.')
        exit(0)

    # --- set config ---
    if args.cookie is not None:
        constant.CONFIG['cookie'] = args.cookie
        logger.info('Cookie saved.')
        write_config()
        exit(0)

    if args.language is not None:
        constant.CONFIG['language'] = args.language
        logger.info('Default language now set to \'{0}\''.format(
            args.language))
        write_config()
        exit(0)
        # TODO: search without language

    if args.proxy:
        proxy_url = urlparse(args.proxy)
        if not args.proxy == '' and proxy_url.scheme not in ('http', 'https'):
            logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(
                proxy_url.scheme))
            exit(0)
        else:
            constant.CONFIG['proxy'] = {
                'http': args.proxy,
                'https': args.proxy,
            }
            logger.info('Proxy now set to \'{0}\'.'.format(args.proxy))
            write_config()
            exit(0)
    # --- end set config ---

    if args.favorites:
        if not constant.CONFIG['cookie']:
            logger.warning(
                'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.'
            )
            exit(1)

    if args.id:
        _ = [i.strip() for i in args.id.split(',')]
        args.id = set(int(i) for i in _ if i.isdigit())

    if args.file:
        with open(args.file, 'r') as f:
            _ = [i.strip() for i in f.readlines()]
            args.id = set(int(i) for i in _ if i.isdigit())

    if (args.is_download or args.is_show
        ) and not args.id and not args.keyword and not args.favorites:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

    if not args.keyword and not args.id and not args.favorites:
        parser.print_help()
        exit(1)

    if args.threads <= 0:
        args.threads = 1

    elif args.threads > 15:
        logger.critical('Maximum number of used threads is 15')
        exit(1)

    return args
示例#35
0
def cmd_parser():
    parser = OptionParser(
        '\n  nhentai --search [keyword] --download'
        '\n  NHENTAI=http://h.loli.club nhentai --id [ID ...]'
        '\n\nEnvironment Variable:\n'
        '  NHENTAI                 nhentai mirror url')
    parser.add_option('--download',
                      dest='is_download',
                      action='store_true',
                      help='download doujinshi (for search results)')
    parser.add_option('--show-info',
                      dest='is_show',
                      action='store_true',
                      help='just show the doujinshi information')
    parser.add_option('--id',
                      type='string',
                      dest='id',
                      action='store',
                      help='doujinshi ids set, e.g. 1,2,3')
    parser.add_option('--search',
                      type='string',
                      dest='keyword',
                      action='store',
                      help='search doujinshi by keyword')
    parser.add_option('--page',
                      type='int',
                      dest='page',
                      action='store',
                      default=1,
                      help='page number of search results')
    parser.add_option('--tag',
                      type='string',
                      dest='tag',
                      action='store',
                      help='download doujinshi by tag')
    parser.add_option(
        '--max-page',
        type='int',
        dest='max_page',
        action='store',
        default=1,
        help='The max page when recursive download tagged doujinshi')
    parser.add_option('--output',
                      type='string',
                      dest='output_dir',
                      action='store',
                      default='',
                      help='output dir')
    parser.add_option('--threads',
                      '-t',
                      type='int',
                      dest='threads',
                      action='store',
                      default=5,
                      help='thread count for downloading doujinshi')
    parser.add_option('--timeout',
                      type='int',
                      dest='timeout',
                      action='store',
                      default=30,
                      help='timeout for downloading doujinshi')
    parser.add_option('--proxy',
                      type='string',
                      dest='proxy',
                      action='store',
                      default='',
                      help='uses a proxy, for example: http://127.0.0.1:1080')
    parser.add_option('--html',
                      dest='html_viewer',
                      action='store_true',
                      help='generate a html viewer at current directory')

    parser.add_option('--login',
                      '-l',
                      type='str',
                      dest='login',
                      action='store',
                      help='username:password pair of nhentai account')

    parser.add_option('--nohtml',
                      dest='is_nohtml',
                      action='store_true',
                      help='Don\'t generate HTML')

    parser.add_option('--cbz',
                      dest='is_cbz',
                      action='store_true',
                      help='Generate Comic Book CBZ File')
    parser.add_option(
        '--rm-origin-dir',
        dest='rm_origin_dir',
        action='store_true',
        default=False,
        help='Remove downloaded doujinshi dir when generated CBZ file.')

    try:
        sys.argv = list(
            map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
    except (NameError, TypeError):
        pass
    except UnicodeDecodeError:
        exit(0)

    args, _ = parser.parse_args(sys.argv[1:])

    if args.html_viewer:
        generate_html()
        exit(0)

    if args.login:
        try:
            _, _ = args.login.split(':', 1)
        except ValueError:
            logger.error('Invalid `username:password` pair.')
            exit(1)

        if not args.is_download:
            logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!')

    if args.id:
        _ = map(lambda id: id.strip(), args.id.split(','))
        args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))

    if (args.is_download or args.is_show) and not args.id and not args.keyword and \
            not args.login and not args.tag:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

    if not args.keyword and not args.id and not args.login and not args.tag:
        parser.print_help()
        exit(1)

    if args.threads <= 0:
        args.threads = 1

    elif args.threads > 15:
        logger.critical('Maximum number of used threads is 15')
        exit(1)

    if args.proxy:
        proxy_url = urlparse(args.proxy)
        if proxy_url.scheme not in ('http', 'https'):
            logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(
                proxy_url.scheme))
        else:
            constant.PROXY = {'http': args.proxy, 'https': args.proxy}

    return args
示例#36
0
def cmd_parser():
    parser = OptionParser(
        '\n  nhentai --search [keyword] --download'
        '\n  NHENTAI=http://h.loli.club nhentai --id [ID ...]'
        '\n  nhentai --file [filename]'
        '\n\nEnvironment Variable:\n'
        '  NHENTAI                 nhentai mirror url')
    # operation options
    parser.add_option('--download',
                      '-D',
                      dest='is_download',
                      action='store_true',
                      help='download doujinshi (for search results)')
    parser.add_option('--show',
                      '-S',
                      dest='is_show',
                      action='store_true',
                      help='just show the doujinshi information')

    # doujinshi options
    parser.add_option('--id',
                      type='string',
                      dest='id',
                      action='store',
                      help='doujinshi ids set, e.g. 1,2,3')
    parser.add_option('--search',
                      '-s',
                      type='string',
                      dest='keyword',
                      action='store',
                      help='search doujinshi by keyword')
    parser.add_option('--tag',
                      type='string',
                      dest='tag',
                      action='store',
                      help='download doujinshi by tag')
    parser.add_option('--artist',
                      type='string',
                      dest='artist',
                      action='store',
                      help='download doujinshi by artist')
    parser.add_option('--character',
                      type='string',
                      dest='character',
                      action='store',
                      help='download doujinshi by character')
    parser.add_option('--parody',
                      type='string',
                      dest='parody',
                      action='store',
                      help='download doujinshi by parody')
    parser.add_option('--group',
                      type='string',
                      dest='group',
                      action='store',
                      help='download doujinshi by group')
    parser.add_option('--language',
                      type='string',
                      dest='language',
                      action='store',
                      help='download doujinshi by language')
    parser.add_option('--favorites',
                      '-F',
                      action='store_true',
                      dest='favorites',
                      help='list or download your favorites.')

    # page options
    parser.add_option('--page',
                      type='int',
                      dest='page',
                      action='store',
                      default=1,
                      help='page number of search results')
    parser.add_option(
        '--max-page',
        type='int',
        dest='max_page',
        action='store',
        default=1,
        help='The max page when recursive download tagged doujinshi')
    parser.add_option('--page-range',
                      type='string',
                      dest='page_range',
                      action='store',
                      help='page range of favorites.  e.g. 1,2-5,14')
    parser.add_option('--sorting',
                      dest='sorting',
                      action='store',
                      default='date',
                      help='sorting of doujinshi (date / popular)',
                      choices=['date', 'popular'])

    # download options
    parser.add_option('--output',
                      '-o',
                      type='string',
                      dest='output_dir',
                      action='store',
                      default='',
                      help='output dir')
    parser.add_option('--threads',
                      '-t',
                      type='int',
                      dest='threads',
                      action='store',
                      default=5,
                      help='thread count for downloading doujinshi')
    parser.add_option('--timeout',
                      '-T',
                      type='int',
                      dest='timeout',
                      action='store',
                      default=30,
                      help='timeout for downloading doujinshi')
    parser.add_option('--delay',
                      '-d',
                      type='int',
                      dest='delay',
                      action='store',
                      default=0,
                      help='slow down between downloading every doujinshi')
    parser.add_option(
        '--proxy',
        '-p',
        type='string',
        dest='proxy',
        action='store',
        default='',
        help='store a proxy, for example: -p \'http://127.0.0.1:1080\'')
    parser.add_option('--file',
                      '-f',
                      type='string',
                      dest='file',
                      action='store',
                      help='read gallery IDs from file.')
    parser.add_option('--format',
                      type='string',
                      dest='name_format',
                      action='store',
                      help='format the saved folder name',
                      default='[%i][%a][%t]')

    # generate options
    parser.add_option('--html',
                      dest='html_viewer',
                      action='store_true',
                      help='generate a html viewer at current directory')
    parser.add_option('--no-html',
                      dest='is_nohtml',
                      action='store_true',
                      help='don\'t generate HTML after downloading')
    parser.add_option(
        '--gen-main',
        dest='main_viewer',
        action='store_true',
        help='generate a main viewer contain all the doujin in the folder')
    parser.add_option('--cbz',
                      '-C',
                      dest='is_cbz',
                      action='store_true',
                      help='generate Comic Book CBZ File')
    parser.add_option(
        '--comic-info',
        dest='write_comic_info',
        action='store_true',
        help='when generating Comic Book CBZ File, also write ComicInfo.xml')
    parser.add_option(
        '--rm-origin-dir',
        dest='rm_origin_dir',
        action='store_true',
        default=False,
        help='remove downloaded doujinshi dir when generated CBZ file.')

    # nhentai options
    parser.add_option('--cookie',
                      type='str',
                      dest='cookie',
                      action='store',
                      help='set cookie of nhentai to bypass Google recaptcha')
    parser.add_option(
        '--save-download-history',
        dest='is_save_download_history',
        action='store_true',
        default=False,
        help=
        'save downloaded doujinshis, whose will be skipped if you re-download them'
    )
    parser.add_option('--clean-download-history',
                      action='store_true',
                      default=False,
                      dest='clean_download_history',
                      help='clean download history')

    try:
        sys.argv = [unicode(i.decode(sys.stdin.encoding)) for i in sys.argv]
        print()
    except (NameError, TypeError):
        pass
    except UnicodeDecodeError:
        exit(0)

    args, _ = parser.parse_args(sys.argv[1:])

    if args.html_viewer:
        generate_html()
        exit(0)

    if args.main_viewer and not args.id and not args.keyword and \
            not args.tag and not args.artist and not args.character and \
            not args.parody and not args.group and not args.language and not args.favorites:
        generate_main_html()
        exit(0)

    if args.clean_download_history:
        with DB() as db:
            db.clean_all()

        logger.info('Download history cleaned.')
        exit(0)

    if os.path.exists(constant.NHENTAI_COOKIE):
        with open(constant.NHENTAI_COOKIE, 'r') as f:
            constant.COOKIE = f.read()

    if args.cookie:
        try:
            if not os.path.exists(constant.NHENTAI_HOME):
                os.mkdir(constant.NHENTAI_HOME)

            with open(constant.NHENTAI_COOKIE, 'w') as f:
                f.write(args.cookie)
        except Exception as e:
            logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e)))
            exit(1)

        logger.info('Cookie saved.')
        exit(0)

    if os.path.exists(constant.NHENTAI_PROXY):
        with open(constant.NHENTAI_PROXY, 'r') as f:
            link = f.read()
            constant.PROXY = {'http': link, 'https': link}

    if args.proxy:
        try:
            if not os.path.exists(constant.NHENTAI_HOME):
                os.mkdir(constant.NHENTAI_HOME)

            proxy_url = urlparse(args.proxy)
            if proxy_url.scheme not in ('http', 'https'):
                logger.error(
                    'Invalid protocol \'{0}\' of proxy, ignored'.format(
                        proxy_url.scheme))
            else:
                with open(constant.NHENTAI_PROXY, 'w') as f:
                    f.write(args.proxy)

        except Exception as e:
            logger.error('Cannot create NHENTAI_HOME: {}'.format(str(e)))
            exit(1)

        logger.info('Proxy \'{0}\' saved.'.format(args.proxy))
        exit(0)

    if args.favorites:
        if not constant.COOKIE:
            logger.warning(
                'Cookie has not been set, please use `nhentai --cookie \'COOKIE\'` to set it.'
            )
            exit(1)

    if args.id:
        _ = [i.strip() for i in args.id.split(',')]
        args.id = set(int(i) for i in _ if i.isdigit())

    if args.file:
        with open(args.file, 'r') as f:
            _ = [i.strip() for i in f.readlines()]
            args.id = set(int(i) for i in _ if i.isdigit())

    if (args.is_download or args.is_show) and not args.id and not args.keyword and \
            not args.tag and not args.artist and not args.character and \
            not args.parody and not args.group and not args.language and not args.favorites:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

    if not args.keyword and not args.id and not args.tag and not args.artist and \
            not args.character and not args.parody and not args.group and not args.language and not args.favorites:
        parser.print_help()
        exit(1)

    if args.threads <= 0:
        args.threads = 1

    elif args.threads > 15:
        logger.critical('Maximum number of used threads is 15')
        exit(1)

    return args
示例#37
0
def cmd_parser():
    parser = OptionParser('\n  nhentai --search [keyword] --download'
                          '\n  NHENTAI=http://h.loli.club nhentai --id [ID ...]'
                          '\n\nEnvironment Variable:\n'
                          '  NHENTAI                 nhentai mirror url')
    parser.add_option('--download', dest='is_download', action='store_true',
                      help='download doujinshi (for search result)')
    parser.add_option('--show-info', dest='is_show', action='store_true', help='just show the doujinshi information')
    parser.add_option('--id', type='string', dest='id', action='store', help='doujinshi ids set, e.g. 1,2,3')
    parser.add_option('--search', type='string', dest='keyword', action='store', help='search doujinshi by keyword')
    parser.add_option('--page', type='int', dest='page', action='store', default=1,
                      help='page number of search result')
    parser.add_option('--tags', type='string', dest='tags', action='store', help='download doujinshi by tags')
    parser.add_option('--output', type='string', dest='output_dir', action='store', default='',
                      help='output dir')
    parser.add_option('--threads', '-t', type='int', dest='threads', action='store', default=5,
                      help='thread count of download doujinshi')
    parser.add_option('--timeout', type='int', dest='timeout', action='store', default=30,
                      help='timeout of download doujinshi')
    parser.add_option('--proxy', type='string', dest='proxy', action='store', default='',
                      help='use proxy, example: http://127.0.0.1:1080')
    parser.add_option('--html', dest='html_viewer', action='store_true',
                      help='generate a html viewer at current directory')

    parser.add_option('--login', '-l', type='str', dest='login', action='store',
                      help='username:password pair of nhentai account')

    try:
        sys.argv = list(map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
    except (NameError, TypeError):
        pass
    except UnicodeDecodeError:
        exit(0)

    args, _ = parser.parse_args(sys.argv[1:])

    if args.html_viewer:
        generate_html()
        exit(0)

    if args.login:
        try:
            _, _ = args.login.split(':', 1)
        except ValueError:
            logger.error('Invalid `username:password` pair.')
            exit(1)

        if not args.is_download:
            logger.warning('YOU DO NOT SPECIFY `--download` OPTION !!!')

    if args.tags:
        logger.warning('`--tags` is under construction')
        exit(1)

    if args.id:
        _ = map(lambda id: id.strip(), args.id.split(','))
        args.id = set(map(int, filter(lambda id_: id_.isdigit(), _)))

    if (args.is_download or args.is_show) and not args.id and not args.keyword and not args.login:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(1)

    if not args.keyword and not args.id and not args.login:
        parser.print_help()
        exit(1)

    if args.threads <= 0:
        args.threads = 1

    elif args.threads > 15:
        logger.critical('Maximum number of used threads is 15')
        exit(1)

    if args.proxy:
        proxy_url = urlparse(args.proxy)
        if proxy_url.scheme not in ('http', 'https'):
            logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(proxy_url.scheme))
        else:
            constant.PROXY = {'http': args.proxy, 'https': args.proxy}

    return args
示例#38
0
def cmd_parser():
    parser = OptionParser(
        '\n  nhentai --search [keyword] --download'
        '\n  NHENTAI=http://h.loli.club nhentai --id [ID ...]'
        '\n\nEnvironment Variable:\n'
        '  NHENTAI                 nhentai mirror url')
    parser.add_option('--download',
                      dest='is_download',
                      action='store_true',
                      help='download doujinshi (for search result)')
    parser.add_option('--show-info',
                      dest='is_show',
                      action='store_true',
                      help='just show the doujinshi information')
    parser.add_option('--id',
                      type='string',
                      dest='id',
                      action='store',
                      help='doujinshi ids set, e.g. 1,2,3')
    parser.add_option('--search',
                      type='string',
                      dest='keyword',
                      action='store',
                      help='search doujinshi by keyword')
    parser.add_option('--page',
                      type='int',
                      dest='page',
                      action='store',
                      default=1,
                      help='page number of search result')
    parser.add_option('--tags',
                      type='string',
                      dest='tags',
                      action='store',
                      help='download doujinshi by tags')
    parser.add_option('--output',
                      type='string',
                      dest='output_dir',
                      action='store',
                      default='',
                      help='output dir')
    parser.add_option('--threads',
                      '-t',
                      type='int',
                      dest='threads',
                      action='store',
                      default=5,
                      help='thread count of download doujinshi')
    parser.add_option('--timeout',
                      type='int',
                      dest='timeout',
                      action='store',
                      default=30,
                      help='timeout of download doujinshi')
    parser.add_option('--proxy',
                      type='string',
                      dest='proxy',
                      action='store',
                      default='',
                      help='use proxy, example: http://127.0.0.1:1080')

    try:
        sys.argv = list(
            map(lambda x: unicode(x.decode(sys.stdin.encoding)), sys.argv))
    except (NameError, TypeError):
        pass
    except UnicodeDecodeError:
        exit(0)

    args, _ = parser.parse_args(sys.argv[1:])

    if args.tags:
        logger.warning('`--tags` is under construction')
        exit(0)

    if args.id:
        _ = map(lambda id: id.strip(), args.id.split(','))
        args.id = set(map(int, filter(lambda id: id.isdigit(), _)))

    if (args.is_download or args.is_show) and not args.id and not args.keyword:
        logger.critical('Doujinshi id(s) are required for downloading')
        parser.print_help()
        exit(0)

    if not args.keyword and not args.id:
        parser.print_help()
        exit(0)

    if args.threads <= 0:
        args.threads = 1

    elif args.threads > 15:
        logger.critical('Maximum number of used threads is 15')
        exit(0)

    if args.proxy:
        proxy_url = urlparse(args.proxy)
        if proxy_url.scheme not in ('http', 'https'):
            logger.error('Invalid protocol \'{0}\' of proxy, ignored'.format(
                proxy_url.scheme))
        else:
            constant.PROXY = {proxy_url.scheme: args.proxy}

    return args