Exemplo n.º 1
0
 def get_or_create_from_input_file(file_path,
                                   thumb_folder=THUMB_FOLDER,
                                   get_page_search=False,
                                   use_cache=True):
     """Get or create from file."""
     checksum = sha256.sha256_checksum(file_path)
     model, model_created = get_or_create(db.session,
                                          SearchFile,
                                          checksum=checksum)
     db.session.commit()  # pylint: disable=no-member
     if not model_created and use_cache:
         return model, False
     model.image, _ = ImageFile.get_or_create_from_file(file_path=file_path)
     if not model.is_thumbnail_exist(thumb_folder):
         model.create_thumbnail(file_path, thumb_folder)
     if not get_page_search:
         return model, True
     keys_values = [
         getattr(model, x) for x in [
             'image_guess', 'search_url', 'similar_search_url',
             'size_search_url'
         ]
     ]
     if not any(keys_values) or not use_cache:
         model.cache_page_search_result(file_path)
     return model, True
Exemplo n.º 2
0
def get_or_create_image_file(file_path, disable_cache=False):
    """Get image file."""
    checksum = sha256_checksum(file_path)
    model, created = gid.models.get_or_create(gid.models.db.session,
                                              gid.models.ImageFile,
                                              checksum=checksum)
    if created or disable_cache:
        kwargs = {}
        img = Image.open(file_path)
        kwargs['width'] = img.size[0]
        kwargs['height'] = img.size[1]
        kwargs['img_format'] = img.format
        kwargs['size'] = os.path.getsize(file_path)
        for key, value in kwargs.items():
            setattr(model, key, value)
    return model, created
Exemplo n.º 3
0
 def get_or_create_from_file(file_path):
     """Get or create from file."""
     kwargs = {}
     checksum = sha256.sha256_checksum(file_path)
     model, created = get_or_create(db.session,
                                    ImageFile,
                                    checksum=checksum)
     if not created:
         return model, created
     img = Image.open(file_path)
     kwargs['width'] = img.size[0]
     kwargs['height'] = img.size[1]
     kwargs['img_format'] = img.format
     kwargs['size'] = os.path.getsize(file_path)
     for key, value in kwargs.items():
         setattr(model, key, value)
     return (model, created)
Exemplo n.º 4
0
 def create_thumbnail(self, file_path, thumb_folder=THUMB_FOLDER):
     """Create thumbnail."""
     with tempfile.NamedTemporaryFile() as temp:
         img = Image.open(file_path)
         img.thumbnail((256, 256))
         try:
             img.save(temp.name, 'JPEG')
         except OSError as err:
             log.warning('Error create thumbnail, convert to jpg first',
                         error=err)
             img.convert('RGB').save(temp.name, 'JPEG')
         thumb_checksum = sha256.sha256_checksum(temp.name)
         thumbnail_path = os.path.join(thumb_folder,
                                       thumb_checksum + '.jpg')
         if not os.path.isfile(thumbnail_path):
             shutil.copyfile(temp.name, thumbnail_path)
         with db.session.no_autoflush:  # pylint: disable=no-member
             thumb_m, _ = ImageFile.get_or_create_from_file(
                 file_path=thumbnail_path)
             self.thumbnail_checksum = thumb_m.checksum
Exemplo n.º 5
0
def check_thumbnails():
    # get all thumbnail files and checksum
    def_thumb_folder = os.path.join(user_data_dir('google_images_download', 'hardikvasa'), 'thumb')  # NOQA
    thumb_folder = def_thumb_folder
    listdir_res = [
        {'basename': x, 'path': os.path.join(thumb_folder, x)}
        for x in os.listdir(def_thumb_folder)
        if os.path.isfile(os.path.join(thumb_folder, x))
    ]
    filtered_ff = []
    for item in listdir_res:
        old_checksum = os.path.splitext(item['basename'])[0]
        checksum = sha256.sha256_checksum(os.path.join(thumb_folder, item['basename']))
        new_basename = checksum + '.jpg'
        new_path = os.path.join(thumb_folder, new_basename)
        if checksum != old_checksum:
            # move thumbnail
            shutil.move(item['path'], new_path)
            log.info('Move thumbnail', src=old_checksum, dst=checksum)
        filtered_ff.append(
            {'basename': new_basename, 'path': new_path, 'checksum': checksum})

    app.config['DEBUG'] = True
    app.config['LOGGER_HANDLER_POLICY'] = 'debug'
    app.config['SECRET_KEY'] = os.getenv('DDG_SERVER_SECRET_KEY') or \
        os.urandom(24)
    app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///gid_debug.db'
    app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
    app.config['WTF_CSRF_ENABLED'] = False
    models.db.init_app(app)
    app.app_context().push()
    models.db.create_all()
    new_model_sets = [
        api.get_or_create_image_file_with_thumbnail(x['path']) for x in filtered_ff]
    models.db.session.add_all([x[0] for x in new_model_sets])
    models.db.session.commit()
    def run(self, item, filename, filename_format='basename', no_clobber=True):
        """run downloader.

        If filename format is not `basename`, then after URL downloaded to `filename` file,
        it will renamed based on the choosen `filename_format`.
        If filename with the new `filename_format` already exist and `no-clobber` is `True`,
        the downloaded file will be deleted and download will be counted as skipped.
        If filename with the new `filename_format` already exist and `no-clobber` is `False`,
        the downloaded file will replace existing file and download will be counted as succcess.

        Args:
            item: Url to be downloaded.
            filename: Filename of the url.
            filename_format: Filename format of the url.
        """
        try:
            req = Request(item, headers={"User-Agent": self.ua.firefox})
            try:
                with urlopen(req) as response, \
                        open(filename, 'wb') as output_file:
                    data = response.read()
                    output_file.write(data)
            except ssl.CertificateError as e:
                logging.debug('Error raised, create unverified context',
                              e=str(e))
                with urlopen(req, context=ssl._create_unverified_context()) as response, \
                        open(filename, 'wb') as output_file:
                    data = response.read()
                    output_file.write(data)

            # assume file is not exist when another filename_format is choosen
            file_already_exist = False
            new_filename = None
            if filename_format == 'sha256':
                new_basename = sha256_checksum(
                    filename=filename)  # without extension
                new_filename = rename_basename(old_filename=filename,
                                               new_basename=new_basename)

                new_filename_exist = os.path.isfile(new_filename)
                if new_filename_exist:
                    logging.debug('Exist: {}'.format(new_filename))

                if new_filename_exist and no_clobber:
                    file_already_exist = True
                    send2trash(filename)  # remove downloaded file
                else:
                    # this will rename or move based on the condition.
                    shutil.move(filename, new_filename)
            else:
                logging.debug(
                    'Unknown filename format: {}'.format(filename_format))

            if file_already_exist:
                print('Skipped\t\t====> {}'.format(new_filename))
                self.dl_counter += 1
            else:
                print("completed\t====> {}".format(filename))
                self.dl_counter += 1

        except IOError:  # If there is any IOError
            self.error_count += 1
            print("IOError on image {}".format(filename))

        except HTTPError as e:  # If there is any HTTPError
            self.error_count += 1
            print("HTTPError {}".format(filename))

        except URLError as e:
            self.error_count += 1
            print("URLError {}".format(filename))