예제 #1
0
파일: cli.py 프로젝트: phretor/python-sdk
def get_analysis(sha256_or_file, upload, save, outfile):
    """
    Get the Koodous report of SHA256_OR_FILE. If the file has not be analyzed
    by Koodous, the file is just submitted (or not, according to the
    --upload option).
    """
    ctx = click.get_current_context()
    api = ctx.meta.get('api')
    wdir = ctx.meta.get('wdir')

    is_file = os.path.isfile(sha256_or_file) and os.access(sha256_or_file,
                                                           os.R_OK)
    sha256 = sha256_or_file
    if is_file:
        sha256 = file_hash(sha256_or_file)
        logger.info('File %s SHA-256 digest = %s', sha256_or_file, sha256)

    logger.info('Getting analysis of %s', sha256)

    analysis = api.get_analysis(sha256)

    click.echo(analysis)

    if analysis:
        click.echo(pygmentize_json(analysis))

        if save:
            if not outfile:
                filepath = os.path.join(wdir, '{}.json'.format(sha256))
                outfile = io.open(filepath, 'wb')
            else:
                filepath = outfile.name

            logger.info('Saving analysis to %s', filepath)
            json.dump(analysis, outfile)

            logger.info('Saved to %s successfully', filepath)
    elif is_file:
        logger.warning('File not found on Koodous')

        if upload:
            logger.info('Uploading file for analysis')

            try:
                upload_result = api.upload(sha256_or_file)
                logger.info('File %s uploaded successfully', upload_result)
            except Exception as ex:
                logger.error('Uploading %s failed: %s', sha256_or_file, ex)
예제 #2
0
def get_analysis(sha256_or_file, upload, save, outfile):
    """
    Get the Koodous report of SHA256_OR_FILE. If the file has not be analyzed
    by Koodous, the file is just submitted (or not, according to the
    --upload option).
    """
    ctx = click.get_current_context()
    api = ctx.meta.get('api')
    wdir = ctx.meta.get('wdir')

    is_file = os.path.isfile(sha256_or_file) and os.access(
        sha256_or_file, os.R_OK)
    sha256 = sha256_or_file
    if is_file:
        sha256 = file_hash(sha256_or_file)
        logger.info('File %s SHA-256 digest = %s', sha256_or_file, sha256)

    logger.info('Getting analysis of %s', sha256)

    analysis = api.get_analysis(sha256)

    click.echo(analysis)

    if analysis:
        click.echo(pygmentize_json(analysis))

        if save:
            if not outfile:
                filepath = os.path.join(wdir, '{}.json'.format(sha256))
                outfile = io.open(filepath, 'wb')
            else:
                filepath = outfile.name

            logger.info('Saving analysis to %s', filepath)
            json.dump(analysis, outfile)

            logger.info('Saved to %s successfully', filepath)
    elif is_file:
        logger.warning('File not found on Koodous')

        if upload:
            logger.info('Uploading file for analysis')

            try:
                upload_result = api.upload(sha256_or_file)
                logger.info('File %s uploaded successfully', upload_result)
            except Exception as ex:
                logger.error('Uploading %s failed: %s', sha256_or_file, ex)
예제 #3
0
 def test_file_hash(self):
     filename = '__init__.py'
     block_size = 1048576
     h1 = utils.file_hash(filename, block_size, hash_type='sha1')
     self.assertEqual(h1, 'da39a3ee5e6b4b0d3255bfef95601890afd80709')
예제 #4
0
def _main():
    media_dirs = args.media_dirs
    for media_dir in media_dirs:
        if not os.path.exists(media_dir):
            raise ValueError(f"Media directory '{media_dir}' does not exist")

    log_file = args.log_file if args.log_file is not None else os.path.join(
        media_dirs[0], "image_organization.log")
    logger = create_logger(log_file, "duplicate_detector")

    logger.info("started new remove-duplicate session")

    print("Listing subtree...")
    all_files = []
    for media_dir in media_dirs:
        all_files.extend(list_subtree(media_dir, recursive=args.recursive))

    media_files = []
    for f in tqdm(all_files, desc="Filtering non-media files"):
        try:
            if is_media(f):
                media_files.append(f)
        except OSError:
            logger.warning(f"OS error while checking if '{f}' is a media file")

    hash_dict = dict()

    n_duplicates_found = n_duplicates_removed = 0
    for file_path in tqdm(media_files, desc="Removing duplicates"):
        try:
            h = file_hash(file_path)
        except PermissionError:
            logger.error(
                f"Permission error while computing the hash of '{file_path}'")
            continue

        if h in hash_dict:
            dup_candidates = hash_dict[h]
            dup_file = None
            for candidate_file in dup_candidates:
                if filecmp.cmp(file_path, candidate_file):
                    dup_file = candidate_file
                    break
            if dup_file is not None:
                n_duplicates_found += 1
                if args.dry_run:
                    logger.info(
                        f"Would remove {file_path} - duplication of {dup_file}"
                    )
                else:
                    logger.info(
                        f"Remove {file_path} - duplication of {dup_file}")
                    os.remove(file_path)
                    n_duplicates_removed += 1
            else:
                hash_dict[h].append(file_path)
        else:
            hash_dict[h] = [file_path]

    print(
        f"Done - removed {n_duplicates_removed}/{n_duplicates_found} duplicates"
    )