Пример #1
0
    def download_image(self, response):
        image_info = response.meta['image_info']
        # mime 
        mime = response.headers['Content-Type']
        image_info['mime'] = mime

        # file_name
        file_name = response.url.split('/')[-1]
        if mime != 'image/jpeg':
            file_name = '{0}.{1}'.format(file_name.split('.')[0], mime.split('/')[-1])
        path_part = os.path.join(file_name[0:2], file_name[16:18])
        image_dir = os.path.join(self.data_path, 'images', path_part)
        file_path = os.path.join(image_dir, file_name)

        # check file if exist
        if os.path.isfile(file_path):
            self.logger.warning("download_image() file exist. image_info: %r" , image_info)
            return

        image_info['file_name'] = file_name
        image_info['file_path'] = os.path.join(path_part, file_name)

        try:
            mkdir(image_dir)
        except OSError, err:
            raise
Пример #2
0
    def start_requests(self):
        if not getYN("ready for run?(y/n): "):
            return

        # create dir
        self.data_path = os.path.join('.', self.settings["DATA_PATH"])
        self.ignore_tags = self.settings['IGNORE_TAGS']

        self.rerun = self.settings["RERUN"]
        self.follow_link= self.settings["FOLLOW_LINK"]
        self.flushRedis = self.settings["FLUSH_REDIS"]

        try:
            mkdir(self.data_path)
        except OSError, err:
            raise