def download_image(self, response): image_info = response.meta['image_info'] # mime mime = response.headers['Content-Type'] image_info['mime'] = mime # file_name file_name = response.url.split('/')[-1] if mime != 'image/jpeg': file_name = '{0}.{1}'.format(file_name.split('.')[0], mime.split('/')[-1]) path_part = os.path.join(file_name[0:2], file_name[16:18]) image_dir = os.path.join(self.data_path, 'images', path_part) file_path = os.path.join(image_dir, file_name) # check file if exist if os.path.isfile(file_path): self.logger.warning("download_image() file exist. image_info: %r" , image_info) return image_info['file_name'] = file_name image_info['file_path'] = os.path.join(path_part, file_name) try: mkdir(image_dir) except OSError, err: raise
def start_requests(self): if not getYN("ready for run?(y/n): "): return # create dir self.data_path = os.path.join('.', self.settings["DATA_PATH"]) self.ignore_tags = self.settings['IGNORE_TAGS'] self.rerun = self.settings["RERUN"] self.follow_link= self.settings["FOLLOW_LINK"] self.flushRedis = self.settings["FLUSH_REDIS"] try: mkdir(self.data_path) except OSError, err: raise