示例#1
0
    def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True, enable_cache=True):
        ''' get page from pixiv and return as parsed BeautifulSoup object or response object.

            throw PixivException as server error
        '''
        url = self.fixUrl(url)
        while True:
            req = mechanize.Request(url)
            req.add_header('Referer', referer)

            read_page = self._get_from_cache(url)
            if read_page is None:
                try:
                    temp = self.open_with_retry(req)
                    read_page = temp.read()
                    read_page = read_page.decode('utf8')
                    if enable_cache:
                        self._put_to_cache(url, read_page)
                    temp.close()
                except urllib.error.HTTPError as ex:
                    if ex.code in [403, 404, 503]:
                        read_page = ex.read()
                        raise PixivException("Failed to get page: {0} => {1}".format(
                            url, ex), errorCode=PixivException.SERVER_ERROR)
                    else:
                        PixivHelper.print_and_log(
                            'error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info())))
                        raise PixivException("Failed to get page: {0}".format(
                            url), errorCode=PixivException.SERVER_ERROR)

            if returnParsed:
                parsedPage = BeautifulSoup(read_page, features="html5lib")
                return parsedPage
            return read_page
示例#2
0
    def __init__(self,
                 mid=0,
                 page=None,
                 fromImage=False,
                 offset=None,
                 limit=None):
        self.offset = offset
        self.limit = limit
        self.artistId = mid

        if page is not None:
            payload = parseJs(page)

            # detect if image count != 0
            if not fromImage:
                payload = demjson.decode(page)
                if payload["error"]:
                    raise PixivException(
                        payload["message"],
                        errorCode=PixivException.OTHER_MEMBER_ERROR,
                        htmlPage=page)
                if payload["body"] is None:
                    raise PixivException(
                        "Missing body content, possible artist id doesn't exists.",
                        errorCode=PixivException.USER_ID_NOT_EXISTS,
                        htmlPage=page)
                self.ParseImages(payload["body"])
            else:
                self.isLastPage = True
                self.haveImages = True

            # parse artist info
            self.ParseInfo(payload, fromImage)
示例#3
0
    def getEmbedData(self, embedData, jsPost):
        if not os.path.exists("content_provider.json"):
            raise PixivException("Missing content_provider.json, please redownload application!",
                                  errorCode=PixivException.MISSING_CONFIG,
                                  htmlPage=None)

        cfg = demjson.decode_file("content_provider.json")
        embed_cfg = cfg["embedConfig"]
        current_provider = embedData["serviceProvider"]

        if current_provider in embed_cfg:
            if embed_cfg[current_provider]["ignore"]:
                return ""

            content_id = None
            for key in embed_cfg[current_provider]["keys"]:
                if embedData.has_key(key):
                    content_id = embedData[key]
                    break

            if content_id is not None and len(content_id) > 0:
                content_format = embed_cfg[current_provider]["format"]
                return content_format.format(content_id)
            else:
                raise PixivException("Empty content_id for embed provider = {0} for post = {1}, please update content_provider.json.".format(embedData["serviceProvider"], self.imageId),
                                      errorCode=9999,
                                      htmlPage=jsPost)
        else:
            raise PixivException("Unsupported embed provider = {0} for post = {1}, please update content_provider.json.".format(embedData["serviceProvider"], self.imageId),
                                 errorCode=9999,
                                 htmlPage=jsPost)
示例#4
0
    def getMemberInfoWhitecube(self, member_id, artist, bookmark=False):
        ''' get artist information using Ajax and AppAPI '''
        try:
            url = 'https://app-api.pixiv.net/v1/user/detail?user_id={0}'.format(
                member_id)
            info = self.get_from_cache(url)
            if info is None:
                PixivHelper.GetLogger().debug("Getting member information: %s",
                                              member_id)
                infoStr = self.open(url).read()
                info = json.loads(infoStr)
                self.put_to_cache(url, info)

            artist.ParseInfo(info, False, bookmark=bookmark)

            # will throw HTTPError if user is suspended/not logged in.
            url_ajax = 'https://www.pixiv.net/ajax/user/{0}'.format(member_id)
            info_ajax = self.get_from_cache(url_ajax)
            if info_ajax is None:
                info_ajax_str = self.open(url_ajax).read()
                info_ajax = json.loads(info_ajax_str)
                self.put_to_cache(url_ajax, info_ajax)
            # 2nd pass to get the background
            artist.ParseBackground(info_ajax)

            return artist
        except urllib2.HTTPError, error:
            errorCode = error.getcode()
            errorMessage = error.get_data()
            PixivHelper.GetLogger().error("Error data: \r\n %s", errorMessage)
            payload = demjson.decode(errorMessage)
            # Issue #432
            if payload.has_key("message"):
                msg = payload["message"]
            elif payload.has_key("error") and payload["error"] is not None:
                msgs = list()
                msgs.append(payload["error"]["user_message"])
                msgs.append(payload["error"]["message"])
                msgs.append(payload["error"]["reason"])
                msg = ",".join(msgs)
            if errorCode == 401:
                raise PixivException(msg,
                                     errorCode=PixivException.NOT_LOGGED_IN,
                                     htmlPage=errorMessage)
            elif errorCode == 403:
                raise PixivException(
                    msg,
                    errorCode=PixivException.USER_ID_SUSPENDED,
                    htmlPage=errorMessage)
            else:
                raise PixivException(
                    msg,
                    errorCode=PixivException.OTHER_MEMBER_ERROR,
                    htmlPage=errorMessage)
示例#5
0
    def get_embed_url_data(self, embedData, jsPost) -> str:
        # Issue #1133
        content_provider_path = os.path.abspath(
            os.path.dirname(sys.executable) + os.sep + "content_provider.json")
        if not os.path.exists(content_provider_path):
            content_provider_path = os.path.abspath("./content_provider.json")
        if not os.path.exists(content_provider_path):
            raise PixivException(
                f"Missing content_provider.json, please get it from https://github.com/Nandaka/PixivUtil2/blob/master/content_provider.json! Expected location => {content_provider_path}",
                errorCode=PixivException.MISSING_CONFIG,
                htmlPage=None)

        cfg = demjson3.decode_file(content_provider_path)
        embed_cfg = cfg["urlEmbedConfig"]
        current_provider = embedData["type"]

        if current_provider in embed_cfg:
            if embed_cfg[current_provider]["ignore"]:
                return ""

            # get urls from given keys
            for key in embed_cfg[current_provider]["get_link_keys"]:
                js_keys = key.split(".")
                root = embedData
                for js_key in js_keys:
                    root = root[js_key]
                links = _url_pattern.finditer(root)
                for link in links:
                    self.try_add(link.group(), self.descriptionUrlList)

            # get all the keys to list
            keys = list()
            for key in embed_cfg[current_provider]["keys"]:
                js_keys = key.split(".")
                root = embedData
                for js_key in js_keys:
                    root = root[js_key]
                keys.append(root)
            template = embed_cfg[current_provider]["format"]

            result = template.format(*keys)
            return result

        else:
            msg = "Unsupported url embed provider = {0} for post = {1}, please update content_provider.json."
            raise PixivException(msg.format(embedData["serviceProvider"],
                                            self.imageId),
                                 errorCode=9999,
                                 htmlPage=jsPost)
示例#6
0
    def parsePosts(self, page):
        js = demjson.decode(page)

        if "error" in js and js["error"]:
            raise PixivException(
                "Error when requesting Fanbox artist: {0}".format(self.artistId), 9999, page)

        if js["body"] is not None:
            js_body = js["body"]

            posts = list()

            if "creator" in js_body:
                self.artistName = js_body["creator"]["user"]["name"]

            if "post" in js_body:
                # new api
                post_root = js_body["post"]
            else:
                # https://www.pixiv.net/ajax/fanbox/post?postId={0}
                # or old api
                post_root = js_body

            for jsPost in post_root["items"]:
                post_id = int(jsPost["id"])
                post = FanboxPost(post_id, self, jsPost, tzInfo=self._tzInfo)
                posts.append(post)
                # sanity check
                assert (self.artistId == int(jsPost["user"]["userId"])), "Different user id from constructor!"

            self.nextUrl = post_root["nextUrl"]
            if self.nextUrl is not None and len(self.nextUrl) > 0:
                self.hasNextPage = True

            return posts
    def open_with_retry(
            self,
            url,
            data=None,
            timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
            retry=0):
        retry_count = 0
        if retry == 0 and self._config is not None:
            retry = self._config.retry

        while True:
            try:
                return self.open(url, data, timeout)
            except urllib.error.HTTPError:
                raise
            except BaseException:
                if retry_count < retry:
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    PixivHelper.print_and_log(
                        'error', 'Error at open_with_retry(): {0}'.format(
                            str(sys.exc_info())))
                    raise PixivException(
                        "Failed to get page: {0}, please check your internet connection/firewall/antivirus."
                        .format(url),
                        errorCode=PixivException.SERVER_ERROR)
示例#8
0
    def open_with_retry(
            self,
            url,
            data=None,
            timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
            retry=0):
        retry_count = 0
        if retry == 0 and self._config is not None:
            retry = self._config.retry

        while True:
            try:
                return self.open(url, data, timeout)
            except urllib2.HTTPError:
                raise
            except Exception as ex:
                if retry_count < retry:
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    raise PixivException(
                        "Failed to get page: {0}, please check your internet connection/firewall/antivirus."
                        .format(ex.message),
                        errorCode=PixivException.SERVER_ERROR)
    def getPixivPage(self, url, referer="https://www.pixiv.net", returnParsed=True):
        ''' get page from pixiv and return as parsed BeautifulSoup object or response object.

            throw PixivException as server error
        '''
        url = self.fixUrl(url)
        retry_count = 0
        while True:
            req = urllib2.Request(url)
            req.add_header('Referer', referer)
            try:
                page = self.open_with_retry(req)
                if returnParsed:
                    parsedPage = BeautifulSoup(page.read())
                    return parsedPage
                else:
                    return page
            except urllib2.HTTPError as ex:
                if ex.code in [403, 404, 503]:
                    return BeautifulSoup(ex.read())
            except BaseException:
                if retry_count < self._config.retry:
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    PixivHelper.print_and_log('error', 'Error at getPixivPage(): {0}'.format(str(sys.exc_info())))
                    raise PixivException("Failed to get page: {0}".format(url), errorCode=PixivException.SERVER_ERROR)
示例#10
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]

        coverUrl = jsPost["coverImageUrl"]
        # Issue #930
        if not self.coverImageUrl and coverUrl:
            self.coverImageUrl = _re_fanbox_cover.sub("fanbox", coverUrl)
            self.try_add(coverUrl, self.embeddedFiles)

        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        self.updatedDate = jsPost["updatedDatetime"]
        self.updatedDateDatetime = datetime_z.parse_datetime(self.updatedDate)

        if "feeRequired" in jsPost:
            self.feeRequired = jsPost["feeRequired"]

        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        self.type = jsPost["type"]
        if self.type not in FanboxPost._supportedType:
            raise PixivException(
                f"Unsupported post type = {self.type} for post = {self.imageId}",
                errorCode=9999,
                htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
示例#11
0
    def getPixivPage(self,
                     url,
                     referer="https://www.pixiv.net",
                     returnParsed=True):
        ''' get page from pixiv and return as parsed BeautifulSoup object or response object.

            throw PixivException as server error
        '''
        url = self.fixUrl(url)
        retry_count = 0
        while True:
            req = urllib2.Request(url)
            req.add_header('Referer', referer)
            try:
                page = self.open(req)
                if returnParsed:
                    parsedPage = BeautifulSoup(page.read())
                    return parsedPage
                else:
                    return page
            except Exception as ex:
                if isinstance(ex, urllib2.HTTPError):
                    if ex.code in [403, 404, 503]:
                        return BeautifulSoup(ex.read())

                if retry_count < self._config.retry:
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    raise PixivException("Failed to get page: " + ex.message,
                                         errorCode=PixivException.SERVER_ERROR)
示例#12
0
    def ParseMangaImagesNew(self, page, _br):
        urls = []
        mangaSection = page.find("section", attrs={'class':'manga'})
        links = mangaSection.findAll('a')
        ## /member_illust.php?mode=manga_big&illust_id=46279245&page=0
        if _br is None:
            import PixivBrowserFactory
            _br = PixivBrowserFactory.getExistingBrowser()

        for link in links:
            try:
                href = _br.fixUrl(link["href"])
                print "Fetching big image page:", href
                bigPage = _br.getPixivPage(url=href, referer = "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=" + str(self.imageId))

                bigImg = bigPage.find('img')
                imgUrl = bigImg["src"]
                print "Found: ", imgUrl
                urls.append(imgUrl)
                bigImg.decompose()
                bigPage.decompose()
                del bigImg
                del bigPage
            except Exception as ex:
                print ex

        total = page.find("span", attrs={'class':'total'})
        if total is not None:
            self.imageCount = int(total.string)
            if self.imageCount != len(urls):
                raise PixivException("Different images count: " + str(self.imageCount) + " != " + str(len(urls)))

        return urls
示例#13
0
    def open_with_retry(
            self,
            url,
            data=None,
            timeout=mechanize._sockettimeout._GLOBAL_DEFAULT_TIMEOUT,
            retry=0):
        retry_count = 0
        if retry == 0 and self._config is not None:
            retry = self._config.retry

        while True:
            try:
                return self.open(url, data, timeout)
            except Exception as ex:
                if isinstance(ex, urllib2.HTTPError):
                    raise

                if retry_count < retry:
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    raise PixivException("Failed to get page: " + ex.message,
                                         errorCode=PixivException.SERVER_ERROR)
示例#14
0
    def open_with_retry(self, url, data=None, timeout=60, retry=0):
        ''' Return response object with retry.'''
        retry_count = 0
        if retry == 0 and self._config is not None:
            retry = self._config.retry

        while True:
            try:
                return self.open(url, data, timeout)
            except urllib.error.HTTPError:
                raise
            except BaseException:
                exc_value = sys.exc_info()[1]
                if retry_count < retry:
                    print(exc_value, end=' ')
                    for t in range(1, self._config.retryWait):
                        print(t, end=' ')
                        time.sleep(1)
                    print('')
                    retry_count = retry_count + 1
                else:
                    temp = url
                    if isinstance(url, urllib.request.Request):
                        temp = url.full_url

                    PixivHelper.print_and_log('error', 'Error at open_with_retry(): {0}'.format(str(sys.exc_info())))
                    raise PixivException("Failed to get page: {0}, please check your internet connection/firewall/antivirus."
                                         .format(temp), errorCode=PixivException.SERVER_ERROR)
示例#15
0
    def parsePost(self, jsPost):
        self.imageTitle = jsPost["title"]

        self.coverImageUrl = jsPost["coverImageUrl"]
        if self.coverImageUrl is not None and self.coverImageUrl not in self.embeddedFiles:
            self.embeddedFiles.append(jsPost["coverImageUrl"])

        self.worksDate = jsPost["publishedDatetime"]
        self.worksDateDateTime = datetime_z.parse_datetime(self.worksDate)
        # Issue #420
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)

        self.updatedDatetime = jsPost["updatedDatetime"]
        self.type = jsPost["type"]
        if self.type not in FanboxPost._supportedType:
            raise PixivException(
                "Unsupported post type = {0} for post = {1}".format(
                    self.type, self.imageId),
                errorCode=9999,
                htmlPage=jsPost)

        self.likeCount = int(jsPost["likeCount"])
        if jsPost["body"] is None:
            self.is_restricted = True
示例#16
0
    def ParseImages(self, page, mode=None, _br=None):
        if page == None:
            raise PixivException('No page given', errorCode = PixivException.NO_PAGE_GIVEN)
        if mode == None:
            mode = self.imageMode

        del self.imageUrls[:]
        if mode == 'big' or mode == 'bigNew':
            self.imageUrls.append(self.ParseBigImages(page))
        elif mode == 'manga':
            self.imageUrls = self.CheckMangaType(page, _br)
        elif mode == 'ugoira_view':
            self.imageUrls.append(self.ParseUgoira(page))
        if len(self.imageUrls) == 0:
            raise PixivException('No images found for: '+ str(self.imageId), errorCode = PixivException.NO_IMAGES)
        return self.imageUrls
示例#17
0
    def parse(self):
        js = json.loads(self.novel_json_str)
        if js["error"]:
            raise PixivException("Cannot get novel details",
                                 errorCode=PixivException.UNKNOWN_IMAGE_ERROR,
                                 htmlPage=self.novel_json_str)

        root = js["body"]

        self.imageTitle = root["title"]
        self.content = root["content"]
        self.artist_id = root["userId"]
        self.bookmark_count = root["bookmarkCount"]
        self.image_response_count = root["imageResponseCount"]
        self.seriesNavData = root["seriesNavData"]
        if root["seriesNavData"] is not None:
            self.seriesId = root["seriesNavData"]["seriesId"]
            self.seriesOrder = root["seriesNavData"]["order"]
        self.isOriginal = root["isOriginal"]
        self.isBungei = root["isBungei"]
        self.language = root["language"]
        self.xRestrict = root["xRestrict"]

        # datetime
        self.worksDateDateTime = datetime_z.parse_datetime(root["createDate"])
        self.uploadDate = datetime_z.parse_datetime(root["uploadDate"])
        self.js_createDate = root["createDate"]  # store for json file
        if self._tzInfo is not None:
            self.worksDateDateTime = self.worksDateDateTime.astimezone(
                self._tzInfo)
            self.uploadDate = self.uploadDate.astimezone(self._tzInfo)

        tempDateFormat = self.dateFormat or "%Y-%m-%d"  # 2018-07-22, else configured in config.ini
        self.worksDate = self.worksDateDateTime.strftime(tempDateFormat)

        # tags
        self.imageTags = list()
        self.tags = list()
        tags = root["tags"]
        if tags is not None:
            tags = root["tags"]["tags"]
            for tag in tags:
                self.imageTags.append(tag["tag"])
                self.tags.append(PixivTagData(tag["tag"], tag))

        # append original tag
        if root["isOriginal"]:
            self.imageTags.append("オリジナル")
            tag = {
                "tag": "オリジナル",
                "locked": True,
                "deletable": False,
                "userId": "",
                "romaji": "original",
                "translation": {
                    "en": "original"
                }
            }
            self.tags.append(PixivTagData(tag["tag"], tag))
示例#18
0
    def __init__(self, page):
        js = demjson.decode(page)

        if js["error"]:
            raise PixivException("Error when requesting Fanbox", 9999, page)

        if js["body"] is not None:
            self.parseSupportedArtists(js["body"])
示例#19
0
 def getEmbedData(self, embedData):
     if embedData["serviceProvider"] == "twitter":
         return "<a href='https://twitter.com/_/status/{0}'>twitter post: {0}</a>".format(embedData["contentId"])
     else:
         raise PixivException("Unsupported embed provider = {0} for post = {1}".format(embedData["serviceProvider"],
                                                                                       self.imageId),
                              errorCode=9999,
                              htmlPage=jsPost)
示例#20
0
    def parse_series_content(self, page_info, current_page):
        js = json.loads(page_info)
        if js["error"]:
            raise PixivException("Cannot get novel series content details",
                                 errorCode=PixivException.UNKNOWN_IMAGE_ERROR,
                                 htmlPage=page_info)

        self.series_list.extend(js["body"]["seriesContents"])
        self.series_list_str[current_page] = page_info
示例#21
0
    def parseList(filename, rootDir=None):
        '''read list.txt and return the list of PixivListItem'''
        l = list()

        if not os.path.exists(filename) :
            raise PixivException("File doesn't exists or no permission to read: " + filename, errorCode=PixivException.FILE_NOT_EXISTS_OR_NO_WRITE_PERMISSION)

        reader = PixivHelper.OpenTextFile(filename)
        lineNo = 1
        try:
            for line in reader:
                originalLine = line
                ##PixivHelper.safePrint("Processing: " + line)
                if line.startswith('#') or len(line) < 1:
                    continue
                if len(line.strip()) == 0:
                    continue
                line = PixivHelper.toUnicode(line)
                line = line.strip()
                items = line.split(" ", 1)

                member_id = int(items[0])
                path = ""
                if len(items) > 1:
                    path = items[1].strip()

                    path = path.replace('\"', '')
                    if rootDir != None:
                        path = path.replace('%root%', rootDir)
                    else:
                        path = path.replace('%root%', '')

                    path = os.path.abspath(path)
                    # have drive letter
                    if re.match(r'[a-zA-Z]:', path):
                        dirpath = path.split(os.sep, 1)
                        dirpath[1] = PixivHelper.sanitizeFilename(dirpath[1], None)
                        path = os.sep.join(dirpath)
                    else:
                        path = PixivHelper.sanitizeFilename(path, rootDir)

                    path = path.replace('\\\\', '\\')
                    path = path.replace('\\', os.sep)

                listItem = PixivListItem(member_id, path)
                l.append(listItem)
                lineNo = lineNo + 1
                originalLine = ""
        except UnicodeDecodeError:
            PixivHelper.GetLogger().exception("PixivListItem.parseList(): Invalid value when parsing list")
            PixivHelper.printAndLog('error', 'Invalid value: {0} at line {1}, try to save the list.txt in UTF-8.'.format(originalLine, lineNo))
        except:
            PixivHelper.GetLogger().exception("PixivListItem.parseList(): Invalid value when parsing list")
            PixivHelper.printAndLog('error', 'Invalid value: {0} at line {1}'.format(originalLine, lineNo))

        reader.close()
        return l
示例#22
0
    def __init__(self, artist_id, page):
        self.artistId = int(artist_id)
        js = demjson.decode(page)

        if js["error"]:
            raise PixivException("Error when requesting Fanbox artist: {0}".format(artistId), 9999, page)

        if js["body"] is not None:
            self.parsePosts(js["body"])
示例#23
0
 def parse(self):
     js = json.loads(self.series_str)
     if js["error"]:
         raise PixivException("Cannot get novel series content details",
                              errorCode=PixivException.UNKNOWN_IMAGE_ERROR,
                              htmlPage=self.series_str)
     # from publishedContentCount or total or displaySeriesContentCount ????
     self.total = js["body"]["total"]
     self.series_name = js["body"]["title"]
示例#24
0
    def __init__(self, mid=0, page=None, fromImage=False):
        if page != None:
            if self.IsNotLoggedIn(page):
                raise PixivException('Not Logged In!',
                                     errorCode=PixivException.NOT_LOGGED_IN)

            if self.IsUserNotExist(page):
                raise PixivException(
                    'User ID not exist/deleted!',
                    errorCode=PixivException.USER_ID_NOT_EXISTS)

            if self.IsUserSuspended(page):
                raise PixivException(
                    'User Account is Suspended!',
                    errorCode=PixivException.USER_ID_SUSPENDED)

            ## detect if there is any other error
            errorMessage = self.IsErrorExist(page)
            if errorMessage != None:
                raise PixivException(
                    'Member Error: ' + errorMessage,
                    errorCode=PixivException.OTHER_MEMBER_ERROR)

            ## detect if there is server error
            errorMessage = self.IsServerErrorExist(page)
            if errorMessage != None:
                raise PixivException('Member Error: ' + errorMessage,
                                     errorCode=PixivException.SERVER_ERROR)

            ## detect if image count != 0
            if not fromImage:
                self.ParseImages(page)

            ## parse artist info
            self.ParseInfo(page, fromImage)

            ## check if no images
            if len(self.imageList) > 0:
                self.haveImages = True
            else:
                self.haveImages = False

            ## check if the last page
            self.CheckLastPage(page)
示例#25
0
    def __init__(self, manga_series_id: int, current_page: int, payload: str):
        self.manga_series_id = manga_series_id
        self.current_page = current_page

        if payload is not None:
            js = json.loads(payload)

            if js["error"]:
                raise PixivException(message=js["message"], errorCode=PixivException.OTHER_ERROR, htmlPage=payload)
            self.parse_info(js["body"])
示例#26
0
    def __init__(self, js_str, type_mode):
        js_data = json.loads(js_str)

        if bool(js_data["error"]):
            raise PixivException(js_data["message"],
                                 errorCode=PixivException.OTHER_ERROR)

        self.last_id = js_data["body"]["lastId"]
        self.images = js_data["body"]["illusts"]
        self.type_mode = type_mode
示例#27
0
    def __init__(self, iid=0, page=None, parent=None, fromBookmark=False, bookmark_count=-1, image_response_count=-1,
                 dateFormat=None):
        self.artist = parent
        self.fromBookmark = fromBookmark
        self.bookmark_count = bookmark_count
        self.imageId = iid
        self.imageUrls = []
        self.dateFormat = dateFormat
        self.descriptionUrlList = []

        if page is not None:
            ## check is error page
            if self.IsNotLoggedIn(page):
                raise PixivException('Not Logged In!', errorCode=PixivException.NOT_LOGGED_IN)
            if self.IsNeedPermission(page):
                raise PixivException('Not in MyPick List, Need Permission!', errorCode=PixivException.NOT_IN_MYPICK)
            if self.IsNeedAppropriateLevel(page):
                raise PixivException('Public works can not be viewed by the appropriate level!',
                                     errorCode=PixivException.NO_APPROPRIATE_LEVEL)
            if self.IsDeleted(page):
                raise PixivException('Image not found/already deleted!', errorCode=PixivException.IMAGE_DELETED)
            if self.IsGuroDisabled(page):
                raise PixivException('Image is disabled for under 18, check your setting page (R-18/R-18G)!',
                                     errorCode=PixivException.R_18_DISABLED)

            ## check if there is any other error
            if self.IsErrorPage(page):
                raise PixivException('An error occurred!', errorCode=PixivException.OTHER_IMAGE_ERROR)

            ## detect if there is any other error
            errorMessage = self.IsErrorExist(page)
            if errorMessage is not None:
                raise PixivException('Image Error: ' + errorMessage, errorCode=PixivException.UNKNOWN_IMAGE_ERROR)

            ## detect if there is server error
            errorMessage = self.IsServerErrorExist(page)
            if errorMessage is not None:
                raise PixivException('Image Error: ' + errorMessage, errorCode=PixivException.SERVER_ERROR)

            ## parse artist information
            if self.artist is None:
                self.artist = PixivArtist(page=page, fromImage=True)

            if fromBookmark and self.originalArtist is None:
                self.originalArtist = PixivArtist(page=page, fromImage=True)
            else:
                self.originalArtist = self.artist

            ## parse image information
            self.ParseInfo(page)
            self.ParseTags(page)
            self.ParseWorksData(page)
示例#28
0
    def __init__(self, iid=0, page=None, parent=None, fromBookmark=False,
                 bookmark_count=-1, image_response_count=-1, dateFormat=None, tzInfo=None):
        self.artist = parent
        self.fromBookmark = fromBookmark
        self.bookmark_count = bookmark_count
        self.imageId = iid
        self.imageUrls = []
        self.dateFormat = dateFormat
        self.descriptionUrlList = []
        self._tzInfo = tzInfo

        if page is not None:

            # Issue #556
            payload = parseJs(page)

            # check error
            if payload is None:
                parsed = BeautifulSoup(page, features="html5lib")
                if self.IsNotLoggedIn(parsed):
                    raise PixivException('Not Logged In!', errorCode=PixivException.NOT_LOGGED_IN, htmlPage=page)
                if self.IsNeedPermission(parsed):
                    raise PixivException('Not in MyPick List, Need Permission!', errorCode=PixivException.NOT_IN_MYPICK, htmlPage=page)
                if self.IsNeedAppropriateLevel(parsed):
                    raise PixivException('Public works can not be viewed by the appropriate level!',
                                         errorCode=PixivException.NO_APPROPRIATE_LEVEL, htmlPage=page)
                if self.IsDeleted(parsed):
                    raise PixivException('Image not found/already deleted!', errorCode=PixivException.IMAGE_DELETED, htmlPage=page)
                if self.IsGuroDisabled(parsed):
                    raise PixivException('Image is disabled for under 18, check your setting page (R-18/R-18G)!',
                                         errorCode=PixivException.R_18_DISABLED, htmlPage=page)
                # detect if there is any other error
                errorMessage = self.IsErrorExist(parsed)
                if errorMessage is not None:
                    raise PixivException('Image Error: ' + str(errorMessage), errorCode=PixivException.UNKNOWN_IMAGE_ERROR, htmlPage=page)
                # detect if there is server error
                errorMessage = self.IsServerErrorExist(parsed)
                if errorMessage is not None:
                    raise PixivException('Image Error: ' + str(errorMessage), errorCode=PixivException.SERVER_ERROR, htmlPage=page)
                parsed.decompose()
                del parsed

            # parse artist information
            if parent is None:
                temp_artist_id = list(payload["user"].keys())[0]
                self.artist = PixivArtist(temp_artist_id, page, fromImage=True)

            if fromBookmark and self.originalArtist is None:
                assert(self.artist is not None)
                self.originalArtist = PixivArtist(page=page, fromImage=True)
                print("From Artist Bookmark: {0}".format(self.artist.artistId))
                print("Original Artist: {0}".format(self.originalArtist.artistId))
            else:
                self.originalArtist = self.artist

            # parse image
            self.ParseInfo(payload)
示例#29
0
 def ParseToken(self, page, fromImage=False):
     try:
         # get the token from stacc feed
         tabFeeds = page.findAll('a', attrs={'class':'tab-feed'})
         if tabFeeds is not None and len(tabFeeds) > 0:
             for a in tabFeeds:
                 if str(a["href"]).find("stacc/") > 0:
                     self.artistToken = a["href"].split("/")[-1]
                     return self.artistToken
     except:
         raise PixivException('Cannot parse artist token, possibly different image structure.', errorCode = PixivException.PARSE_TOKEN_DIFFERENT_IMAGE_STRUCTURE)
示例#30
0
    def getEmbedData(self, embedData, jsPost) -> str:
        # Issue #881
        content_provider_path = os.path.abspath(
            os.path.dirname(sys.executable) + os.sep + "content_provider.json")
        if not os.path.exists(content_provider_path):
            content_provider_path = os.path.abspath("./content_provider.json")
        if not os.path.exists(content_provider_path):
            raise PixivException(
                f"Missing content_provider.json, please get it from https://github.com/Nandaka/PixivUtil2/blob/master/content_provider.json! Expected location => {content_provider_path}",
                errorCode=PixivException.MISSING_CONFIG,
                htmlPage=None)

        cfg = demjson.decode_file(content_provider_path)
        embed_cfg = cfg["embedConfig"]
        current_provider = embedData["serviceProvider"]

        if current_provider in embed_cfg:
            if embed_cfg[current_provider]["ignore"]:
                return ""

            content_id = None
            for key in embed_cfg[current_provider]["keys"]:
                if key in embedData:
                    content_id = embedData[key]
                    break

            if content_id is not None and len(content_id) > 0:
                content_format = embed_cfg[current_provider]["format"]
                return content_format.format(content_id)
            else:
                msg = "Empty content_id for embed provider = {0} for post = {1}, please update content_provider.json."
                raise PixivException(msg.format(embedData["serviceProvider"],
                                                self.imageId),
                                     errorCode=9999,
                                     htmlPage=jsPost)
        else:
            msg = "Unsupported embed provider = {0} for post = {1}, please update content_provider.json."
            raise PixivException(msg.format(embedData["serviceProvider"],
                                            self.imageId),
                                 errorCode=9999,
                                 htmlPage=jsPost)