示例#1
0
def reconfigure_duplicate_cluster(original, cluster_outside):
    # when a finding is deleted, and is an original of a duplicate cluster, we have to chose a new original for the cluster
    # only look for a new original if there is one outside this test
    if original is None or cluster_outside is None or len(
            cluster_outside) == 0:
        return

    if settings.DUPLICATE_CLUSTER_CASCADE_DELETE:
        cluster_outside.order_by('-id').delete()
    else:
        logger.debug('reconfigure_duplicate_cluster: cluster_outside: %s',
                     cluster_outside)
        # set new original to first finding in cluster (ordered by id)
        new_original = cluster_outside.order_by('id').first()
        if new_original:
            logger.debug('changing original of duplicate cluster %d to: %s:%s',
                         original.id, new_original.id, new_original.title)

            new_original.duplicate = False
            new_original.duplicate_finding = None
            new_original.active = True
            new_original.save_no_options()
            new_original.found_by.set(original.found_by.all())

        # if the cluster is size 1, there's only the new original left
        if new_original and len(cluster_outside) > 1:
            # for find in cluster_outside:
            #     if find != new_original:
            #         find.duplicate_finding = new_original
            #         find.save_no_options()

            mass_model_updater(Finding,
                               cluster_outside,
                               lambda f: set_new_original(f, new_original),
                               fields=['duplicate_finding'])
def npm_censor_hashes(apps, schema_editor):
    # We can't import the models directly as it may be a newer
    # version than this migration expects. We use the historical version.
    logger.info('Removing random hashes from npm audit file_paths')

    now = timezone.now()
    Finding = apps.get_model('dojo', 'Finding')
    Test_Type = apps.get_model('dojo', 'Test_Type')
    npm_audit, _ = Test_Type.objects.get_or_create(name='NPM Audit Scan')
    findings = Finding.objects.filter(test__test_type=npm_audit)

    mass_model_updater(Finding,
                       findings,
                       lambda f: censor_hashes(f),
                       fields=['file_path', 'hash_code'])
示例#3
0
    def handle(self, *args, **options):
        restrict_to_parsers = options['parser']
        hash_code_only = options['hash_code_only']
        dedupe_only = options['dedupe_only']
        dedupe_sync = options['dedupe_sync']

        if restrict_to_parsers is not None:
            findings = Finding.objects.filter(
                test__test_type__name__in=restrict_to_parsers)
            logger.info(
                "######## Will process only parsers %s and %d findings ########",
                *restrict_to_parsers, findings.count())
        else:
            # add filter on id to make counts not slow on mysql
            findings = Finding.objects.all().filter(id__gt=0)
            logger.info(
                "######## Will process the full database with %d findings ########",
                findings.count())

        # Phase 1: update hash_codes without deduplicating
        if not dedupe_only:
            logger.info(
                "######## Start Updating Hashcodes (foreground) ########")

            # only prefetch here for hash_code calculation
            finds = findings.prefetch_related('endpoints', 'test__test_type')
            mass_model_updater(Finding,
                               finds,
                               lambda f: generate_hash_code(f),
                               fields=['hash_code'],
                               order='asc',
                               log_prefix='hash_code computation ')

            logger.info("######## Done Updating Hashcodes########")

        # Phase 2: deduplicate synchronously
        if not hash_code_only:
            if get_system_setting('enable_deduplication'):
                logger.info("######## Start deduplicating (%s) ########",
                            ('foreground' if dedupe_sync else 'background'))
                if dedupe_sync:
                    mass_model_updater(Finding,
                                       findings,
                                       lambda f: do_dedupe_finding(f),
                                       fields=None,
                                       order='desc',
                                       page_size=100,
                                       log_prefix='deduplicating ')
                else:
                    # async tasks only need the id
                    mass_model_updater(Finding,
                                       findings.only('id'),
                                       lambda f: do_dedupe_finding_task(f.id),
                                       fields=None,
                                       order='desc',
                                       log_prefix='deduplicating ')

                # update the grading (if enabled)
                logger.debug('Updating grades for products...')
                for product in Product.objects.all():
                    calculate_grade(product)

                logger.info("######## Done deduplicating (%s) ########",
                            ('foreground'
                             if dedupe_sync else 'tasks submitted to celery'))
            else:
                logger.debug(
                    "skipping dedupe because it's disabled in system settings")
示例#4
0
def reset_duplicates_before_delete(qs):
    mass_model_updater(Finding,
                       qs,
                       lambda f: reset_duplicate_before_delete(f),
                       fields=['duplicate', 'duplicate_finding'])