def ParseWorksData(self, page): temp = page.find(attrs={'class': 'meta'}).findAll('li') # 07/22/2011 03:09|512×600|RETAS STUDIO # 07/26/2011 00:30|Manga 39P|ComicStudio 鉛筆 つけペン # 1/05/2011 07:09|723×1023|Photoshop SAI [ R-18 ] # 2013年3月16日 06:44 | 800×1130 | Photoshop ComicStudio | R-18 # 2013年12月14日 19:00 855×1133 PhotoshopSAI self.worksDate = PixivHelper.toUnicode(temp[0].string, encoding=sys.stdin.encoding) self.worksDateDateTime = PixivHelper.ParseDateTime(self.worksDate, self.dateFormat) self.worksResolution = unicode(temp[1].string).replace(u'×', u'x') toolsTemp = page.find(attrs={'class': 'meta'}).find(attrs={'class': 'tools'}) if toolsTemp is not None and len(toolsTemp) > 0: tools = toolsTemp.findAll('li') for tool in tools: self.worksTools = self.worksTools + ' ' + unicode(tool.string) self.worksTools = self.worksTools.strip()
def ParseInfo(self, page): self.imageUrls = list() images = page.findAll( "div", attrs={"class": "illust-zoom-in thumbnail-container"}) if len(images) > 0: for image in images: url = image["data-original-src"] self.imageUrls.append(url) self.imageCount = len(self.imageUrls) if self.imageCount == 1: self.imageMode = "big" elif self.imageCount > 1: self.imageMode = "manga" else: # ugoira canvas = page.find("div", attrs={"class": "ugoira player-container"}) self.imageMode = "ugoira_view" url = self.ParseUgoira(canvas["data-ugoira-meta"]) self.imageUrls.append(url) # title/caption self.imageTitle = page.findAll("div", attrs={"class": "title-container"})[0].text descriptions = page.findAll( "div", attrs={"class": "description-text ui-expander-target"}) if len(descriptions) > 0: self.imageCaption = descriptions[0].text # view count self.jd_rtv = int(page.find(attrs={'class': 'react-count'}).text) # like count # react-count _clickable illust-bookmark-count-59521621 count like-count self.jd_rtc = int( page.find( attrs={ 'class': re.compile( r"react-count _clickable illust-bookmark-count-{0} count like-count.*" .format(self.imageId)) }).text) # not available self.jd_rtt = self.jd_rtc # tags self.imageTags = list() # _tag-container tags illust-59521621 tagContainer = page.find("div", attrs={ "class": "_tag-container tags illust-{0}".format( self.imageId) }) # special node for R-18 r18Tag = page.findAll(attrs={'class': 'tag r-18'}) if r18Tag is not None and len(r18Tag) > 0: self.imageTags.append("R-18") tagLinks = tagContainer.findAll("a", attrs={"class": re.compile(r"tag.*")}) for link in tagLinks: self.imageTags.append(link["data-activity-tag_name"]) # date self.worksDate = page.find("li", attrs={"class": "datetime"}).text self.worksDateDateTime = PixivHelper.ParseDateTime( self.worksDate, self.dateFormat) # resolution # tools tools = page.findAll("li", attrs={"class": "tool"}) t = list() for tool in tools: t.append(tool.text) if len(t) > 0: self.worksTools = ", ".join(t)