Пример #1
0
def main(options, arguments):
	#print 'options %s' % options
	#print 'arguments %s' % arguments
	if(options.device != None) :
		if(options.device == '/dev/mem') :
			mmemory = Mem()
		elif(options.device == '/dev/kmem') :
			mmemory = Kmem()
		else:
			usage()
	else :
		mmemory = Kmem()

	if(options.usemmap == None):
		options.usemmap = 0

	if(options.view != None):
		if(options.view == 'tasks'):
			ttasks = GVTasks(mmemory, options.usemmap)
			ttasks.viewTasks()
		elif(options.view == 'syscalls'):
			mysyscalls = GVSyscalls(mmemory, options.usemmap)
			mysyscalls.viewSyscalls()
		elif(options.view == 'networks'):
			nnetworks = GVNetworks(mmemory, options.usemmap)
			nnetworks.viewNetworks()
			
	elif(options.check != None):
		if(options.check == 'tasks'):
			ttasks = GVTasks(mmemory, options.usemmap)
			ttasks.checkViewTasks()
		elif(options.check == 'networks'):
			nnetworks = GVNetworks(mmemory, options.usemmap)
			nnetworks.checkViewNetworks()
			
	elif(options.fingerprints != None):
		ffingerprints = Fingerprints(mmemory)
		if(options.fingerprints[1] == 'create'):
			ffingerprints.doFingerprints(options.fingerprints[0])
		elif(options.fingerprints[1] == 'check'):
			ffingerprints.checkFingerprints(options.fingerprints[0])
			
	elif(options.bump != None):
		mmemory.open("r", options.usemmap)
		mmemory.dump(string.atol(options.bump[0], 16), int(options.bump[1]), options.bump[2])
		mmemory.close()
	
	else:
		usage()
Пример #2
0
def compare(files, threshold):
    import tempfile
    import glob

    tempdir = tempfile.mkdtemp()
    print tempdir

    for filepath in files:
        shutil.copy2(filepath, tempdir)

    files = glob.glob(os.path.join(tempdir, '*'))
    fingerprints = Fingerprints(output_dir=tempdir, threshold=threshold)
    globals()['fingerprints'] = fingerprints

    results = utils.apply_async(calculate, [
        (tempdir, filepath) for filepath in files
    ])
    for r in results:
        fingerprints.add(r)
    fingerprints.uncompress()

    for filepath in files:
        duplicates = get_duplicates(fingerprints.find_hash(filepath))
        print duplicates
Пример #3
0
def process(sources, dest, threshold, ignore, clean, normalize, cpus):
    sources = utils.normalize_paths(list(sources))
    ignore = utils.normalize_paths(list(ignore) + [dest])

    fingerprints = Fingerprints(output_dir=dest, threshold=threshold, ignore=ignore)
    duplicates_file = os.path.join(dest, ".duplicates.json")

    print "Max distance of similarity:", fingerprints.max_distance

    # to get progress, count the total number of files we will touch
    files = []
    for source in sources:
        files += utils.walker(source, lambda x: x, ignore=ignore)

    print "Processing {0} files.".format(len(files))

    if len(files) == 0:
        print "No files found."
        quit()

    if normalize:
        print "Normalizing images..."
        files = utils.apply_async(utils.normalize_image, [
            (fingerprints._backup_dir, filepath, clean) for filepath in files
        ], cpus=cpus)

    need_fingerprint = [
        (fingerprints._backup_dir, filepath)
        for filepath in files if not fingerprints.find_hash(filepath)
    ]

    if need_fingerprint:
        def save_progress(results):
            for r in results:
                fingerprints.add(r)
            fingerprints.save()
            print "Saved."

        print "Calculating fingerprints for {0} files...".format(len(need_fingerprint))
        results = utils.apply_async(calculate, need_fingerprint,
                                    aborted_callback=save_progress, cpus=cpus)
        save_progress(results)

    fingerprints.uncompress()

    output = {"threshold": threshold, "duplicates": {}}
    if os.path.exists(duplicates_file):
        with open(duplicates_file, 'r') as fp:
            loaded = json.load(fp)
            if loaded.get("threshold") == threshold:
                output = loaded

    output_duplicates = output.get("duplicates", {})

    # we need to recalculate *all* duplicate information if there is a new file
    # added since the last time we ran, since it might be a duplicate of anything
    # TODO: handle ctrl-c again
    old_duplicates = set(output_duplicates.keys())
    missing_duplicate_info = set(files) - old_duplicates

    if missing_duplicate_info:
        need_duplicate = files
        output_duplicates.clear()
    else:
        need_duplicate = [
            filepath for filepath in files if filepath not in output_duplicates.keys()
        ]

    # seen_files = []
    # for filepath, duplicate_files in output_duplicates.iteritems():
    #     seen_files.append(filepath)
    #     for distance, duplicate_path in duplicate_files:
    #         seen_files.append(duplicate_path)

    if need_duplicate:
        print "Looking for duplicates for {0} files...".format(len(need_duplicate))

        def save_progress(results):
            for image_hash, duplicates in results:
                if not image_hash:
                    continue

                filepath = image_hash.path
                output_duplicates[filepath] = duplicates

            with open(duplicates_file, "w") as fp:
                json.dump(output, fp)

            print "Saved."

        def setup(fingerprints):
            """
            hack: store the fingerprints into globals for each process so the worker
            function can pull them from globals()
            """
            globals()['fingerprints'] = fingerprints

        results = utils.apply_async(
            get_duplicates,
            [(fingerprints.find_hash(filepath),) for filepath in need_duplicate],
            pool_args=dict(
                initializer=setup,
                initargs=(fingerprints,)
            ),
            aborted_callback=save_progress,
            cpus=cpus
        )

        # results = []
        # setup(fingerprints)
        # progress = utils.Progress(len(need_duplicate))
        # for filepath in need_duplicate:
        #     results.append(get_duplicates(fingerprints.find_hash(filepath)))
        #     progress.incr()
        # progress.done()

        save_progress(results)

    keys = sorted(output_duplicates.keys(), key=get_file_date)

    seen_files = []
    count = 0
    duplicates_count = 0
    duplicates_dest = os.path.join(dest, "duplicates")
    for filepath in keys:
        if filepath in seen_files:
            continue

        duplicates = output_duplicates[filepath]
        count += 1
        seen_files.append(filepath)
        seen_files.extend([duplicate_path for distance, duplicate_path in duplicates])

        all_files = [(0, filepath)] + duplicates
        distance, best_image = get_best_match(all_files)
        new_filepath = get_new_image_path(best_image, count, dest)
        print "Copying {0} to {1}".format(best_image, new_filepath)
        shutil.copy2(best_image, new_filepath)

        if duplicates:
            duplicates_count += 1
            filedest = os.path.join(duplicates_dest, str(duplicates_count))

            all_files = [(distance, filepath)] + duplicates
            for idx, (distance, filepath) in enumerate(all_files, start=1):
                # only copy non-exact matches for spot checking
                if distance > 0 and filepath != best_image:
                    if not os.path.isdir(filedest):
                        os.makedirs(filedest)
                    shutil.copy2(
                        filepath,
                        get_new_image_path(
                            filepath,
                            idx,
                            filedest,
                            "{0}_{1}".format(distance, "{0}{1}")
                        )
                    )

            if os.path.isdir(filedest):
                shutil.copy2(
                    new_filepath, get_new_image_path(new_filepath, None, filedest, "kept{1}")
                )