Пример #1
0
    def ParseWorksData(self, page):
        temp = page.find(attrs={'class': 'meta'}).findAll('li')
        # 07/22/2011 03:09|512×600|RETAS STUDIO
        # 07/26/2011 00:30|Manga 39P|ComicStudio 鉛筆 つけペン
        # 1/05/2011 07:09|723×1023|Photoshop SAI  [ R-18 ]
        # 2013年3月16日 06:44 | 800×1130 | Photoshop ComicStudio | R-18
        # 2013年12月14日 19:00 855×1133 PhotoshopSAI

        self.worksDate = PixivHelper.toUnicode(temp[0].string, encoding=sys.stdin.encoding)
        self.worksDateDateTime = PixivHelper.ParseDateTime(self.worksDate, self.dateFormat)

        self.worksResolution = unicode(temp[1].string).replace(u'×', u'x')
        toolsTemp = page.find(attrs={'class': 'meta'}).find(attrs={'class': 'tools'})
        if toolsTemp is not None and len(toolsTemp) > 0:
            tools = toolsTemp.findAll('li')
            for tool in tools:
                self.worksTools = self.worksTools + ' ' + unicode(tool.string)
            self.worksTools = self.worksTools.strip()
Пример #2
0
    def ParseInfo(self, page):
        self.imageUrls = list()
        images = page.findAll(
            "div", attrs={"class": "illust-zoom-in thumbnail-container"})

        if len(images) > 0:
            for image in images:
                url = image["data-original-src"]
                self.imageUrls.append(url)

            self.imageCount = len(self.imageUrls)
            if self.imageCount == 1:
                self.imageMode = "big"
            elif self.imageCount > 1:
                self.imageMode = "manga"
        else:
            # ugoira
            canvas = page.find("div",
                               attrs={"class": "ugoira player-container"})
            self.imageMode = "ugoira_view"
            url = self.ParseUgoira(canvas["data-ugoira-meta"])
            self.imageUrls.append(url)

        # title/caption
        self.imageTitle = page.findAll("div",
                                       attrs={"class":
                                              "title-container"})[0].text
        descriptions = page.findAll(
            "div", attrs={"class": "description-text ui-expander-target"})
        if len(descriptions) > 0:
            self.imageCaption = descriptions[0].text

        # view count
        self.jd_rtv = int(page.find(attrs={'class': 'react-count'}).text)
        # like count
        # react-count _clickable illust-bookmark-count-59521621 count like-count
        self.jd_rtc = int(
            page.find(
                attrs={
                    'class':
                    re.compile(
                        r"react-count _clickable illust-bookmark-count-{0} count like-count.*"
                        .format(self.imageId))
                }).text)
        # not available
        self.jd_rtt = self.jd_rtc

        # tags
        self.imageTags = list()
        # _tag-container tags illust-59521621
        tagContainer = page.find("div",
                                 attrs={
                                     "class":
                                     "_tag-container tags illust-{0}".format(
                                         self.imageId)
                                 })
        # special node for R-18
        r18Tag = page.findAll(attrs={'class': 'tag r-18'})
        if r18Tag is not None and len(r18Tag) > 0:
            self.imageTags.append("R-18")
        tagLinks = tagContainer.findAll("a",
                                        attrs={"class": re.compile(r"tag.*")})
        for link in tagLinks:
            self.imageTags.append(link["data-activity-tag_name"])

        # date
        self.worksDate = page.find("li", attrs={"class": "datetime"}).text
        self.worksDateDateTime = PixivHelper.ParseDateTime(
            self.worksDate, self.dateFormat)

        # resolution

        # tools
        tools = page.findAll("li", attrs={"class": "tool"})
        t = list()
        for tool in tools:
            t.append(tool.text)
        if len(t) > 0:
            self.worksTools = ", ".join(t)