def downloadImgur(href, path=''): '''Detects the type of url and does the appropriate download''' if 'gallery/' in href: downloadImgurPage(href, path) elif '/r/' in href: downloadImgurGallery(href, path) elif href[-4] == '.': #possibly a pic ex .jpg, .png downloadResource(href, destPath=path) else: imgBox = getElementsFromUrl(href, 'div.image.textbox > a') for e in imgBox: src = e.get('href') downloadResource(src, destPath=path)
for i, postUrl in enumerate(postUrls[:limit], 1): print i, '/', limit, postUrl #Find pictures directly on post newSrcs = cleanImgSrcs(getImgSrcs(postUrl)) print '\tFound :', len(newSrcs) imageSrcs += newSrcs #Find pictures in post iframe elems = getElementsFromUrl(postUrl, 'iframe.photoset') iframeUrls = [e.get('src') for e in elems] for iframeUrl in iframeUrls: print '\tiframe:', iframeUrl iframeImageSrcs = cleanImgSrcs(getImgSrcs(iframeUrl)) print '\tFound :', len(iframeImageSrcs) imageSrcs += iframeImageSrcs return imageSrcs if __name__ == '__main__': print 'Getting image srcs...' srcs = getSearchImgs('cat') print 'Result:' print '\n'.join(srcs) print 'Downloading images...' for i, src in enumerate(srcs, 1): print i, '/', len(srcs) downloadResource(src, destPath='tumblr')
def downloadImgurPage(href, path=''): '''Downloads all the images from an imgur page or album''' imgSrcs = getImgurImageSrcs(href) for src in imgSrcs: downloadResource(src, destPath=path)