def grab(path, image_url, file_name=None): try: if not file_name: file_name = image_url.split('/')[-1] response = requests.get(image_url) content_type = response.headers['content-type'] if content_type not in ImageLoader.ALLOWED_TYPES: return False prefix = content_type.split('/')[-1] file_name = '%s.%s' % (file_name, prefix) file_path = os.path.join(path, file_name) write_file(file_path, response.content) except exceptions.RequestException as error: Logger.record_log('Occurred Exception:', error)
def search_and_grab_candy(self, urls=None): if None is urls: urls = [] for url in urls: self.driver.get(url) board_name = self.driver.find_element_by_css_selector( '.board-name').text image_dir = create_image_dir('%s/%s' % (self.resource_path, board_name)) suburls = [] ret = self.driver.execute_script( 'return document.querySelectorAll(".pin a.layer-view");') while len(ret) > 0: for element in ret: suburls.append(element.get_attribute('href')) el_last_child = self.driver.find_element_by_css_selector( '.pin[data-seq]:last-child') query = ('max=%s&limit=20&wfl=1' % str(el_last_child.get_attribute('data-seq'))) self.driver.get('%s?%s' % (url, query)) ret = self.driver.execute_script( 'return document.querySelectorAll(".pin a.layer-view");') Logger.record_log('Request root url page %s?%s' % (url, query)) Logger.write_log_file() Logger.record_log('Find suburls length: %s' % len(suburls)) imgurls = [] for suburl in suburls: self.driver.get(suburl) el_img = self.driver.find_element_by_css_selector( '.zoom-layer img') imgurls.append(el_img.get_attribute('src')) Logger.record_log('Request sub url page %s' % el_img.get_attribute('src')) Logger.write_log_file() Logger.record_log('Find imgurls length: %s' % len(imgurls)) for imgurl in imgurls: Logger.record_log('Grab: %s' % imgurl) ImageLoader.grab(image_dir, imgurl) Logger.write_log_file()