def checkForOpenGraphTag(self): """\ checks to see if we were able to find open graph tags on this page """ node = self.article.rawDoc meta = Parser.getElementsByTag(node, tag='meta', attr='property', value='og:image') for item in meta: href = Parser.getAttribute(item, attr='content') if href: mainImage = Image() mainImage.imageSrc = href mainImage.imageExtractionType = "opengraph" mainImage.confidenceScore = 100 locallyStoredImage = self.getLocallyStoredImage( mainImage.imageSrc) if locallyStoredImage: mainImage.bytes = locallyStoredImage.bytes mainImage.height = locallyStoredImage.height mainImage.width = locallyStoredImage.width return mainImage return None
def checkForLinkTag(self): """\ checks to see if we were able to find open link_src on this page """ node = self.article.rawDoc meta = Parser.getElementsByTag(node, tag='link', attr='rel', value='image_src') for item in meta: href = Parser.getAttribute(item, attr='href') if href: mainImage = Image() mainImage.imageSrc = href mainImage.imageExtractionType = "linktag" mainImage.confidenceScore = 100 locallyStoredImage = self.getLocallyStoredImage( mainImage.imageSrc) if locallyStoredImage: mainImage.bytes = locallyStoredImage.bytes mainImage.height = locallyStoredImage.height mainImage.width = locallyStoredImage.width return mainImage return None
def checkForKnownElements(self): """\ in here we check for known image contains from sites we've checked out like yahoo, techcrunch, etc... that have * known places to look for good images. * TODO: enable this to use a series of settings files so people can define what the image ids/classes are on specific sites """ domain = self.getCleanDomain() if domain in self.customSiteMapping.keys(): classes = self.customSiteMapping.get(domain).split('|') for classname in classes: KNOWN_IMG_DOM_NAMES.append(classname) knownImage = None for knownName in KNOWN_IMG_DOM_NAMES: known = Parser.getElementById(self.article.rawDoc, knownName) if not known: known = Parser.getElementsByTag(self.article.rawDoc, attr='class', value=knownName) if known: known = known[0] if known: mainImage = Parser.getElementsByTag(known, tag='img') if mainImage: knownImage = mainImage[0] if knownImage is not None: knownImgSrc = Parser.getAttribute(knownImage, attr='src') mainImage = Image() mainImage.imageSrc = self.buildImagePath(knownImgSrc) mainImage.imageExtractionType = "known" mainImage.confidenceScore = 90 locallyStoredImage = self.getLocallyStoredImage(mainImage.imageSrc) if locallyStoredImage: mainImage.bytes = locallyStoredImage.bytes mainImage.height = locallyStoredImage.height mainImage.width = locallyStoredImage.width return mainImage
def checkForOpenGraphTag(self): """\ checks to see if we were able to find open graph tags on this page """ node = self.article.rawDoc meta = Parser.getElementsByTag(node, tag='meta', attr='property', value='og:image') for item in meta: href = Parser.getAttribute(item, attr='content') if href: mainImage = Image() mainImage.imageSrc = href mainImage.imageExtractionType = "opengraph" mainImage.confidenceScore = 100 locallyStoredImage = self.getLocallyStoredImage(mainImage.imageSrc) if locallyStoredImage: mainImage.bytes = locallyStoredImage.bytes mainImage.height = locallyStoredImage.height mainImage.width = locallyStoredImage.width return mainImage return None
def checkForLinkTag(self): """\ checks to see if we were able to find open link_src on this page """ node = self.article.rawDoc meta = Parser.getElementsByTag(node, tag='link', attr='rel', value='image_src') for item in meta: href = Parser.getAttribute(item, attr='href') if href: mainImage = Image() mainImage.imageSrc = href mainImage.imageExtractionType = "linktag" mainImage.confidenceScore = 100 locallyStoredImage = self.getLocallyStoredImage(mainImage.imageSrc) if locallyStoredImage: mainImage.bytes = locallyStoredImage.bytes mainImage.height = locallyStoredImage.height mainImage.width = locallyStoredImage.width return mainImage return None