def formfield_for_choice_field(self, db_field, request, **kwargs): if db_field.name == "scanner": kwargs['choices'] = (("", "---------"), ) for key, value in SCANNERS.items(): if key in [CUSTOMS, YARA]: kwargs['choices'] += ((key, value), ) return super().formfield_for_choice_field(db_field, request, **kwargs)
def run_scanner(results, upload_pk, scanner, api_url, api_key): """ Run a scanner on a FileUpload via RPC and store the results. - `results` are the validation results passed in the validation chain. This task is a validation task, which is why it must receive the validation results as first argument. - `upload_pk` is the FileUpload ID. """ scanner_name = SCANNERS.get(scanner) log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) if not results['metadata']['is_webextension']: log.info( 'Not running scanner "%s" for FileUpload %s, it is not a ' 'webextension.', scanner_name, upload_pk) return results upload = FileUpload.objects.get(pk=upload_pk) try: if not os.path.exists(upload.path): raise ValueError('File "{}" does not exist.'.format(upload.path)) scanner_result = ScannerResult(upload=upload, scanner=scanner) with statsd.timer('devhub.{}'.format(scanner_name)): json_payload = { 'api_key': api_key, 'download_url': upload.get_authenticated_download_url(), } response = requests.post(url=api_url, json=json_payload, timeout=settings.SCANNER_TIMEOUT) try: data = response.json() except ValueError: # Log the response body when JSON decoding has failed. raise ValueError(response.text) if response.status_code != 200 or 'error' in data: raise ValueError(data) scanner_result.results = data scanner_result.save() statsd.incr('devhub.{}.success'.format(scanner_name)) log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) except Exception: statsd.incr('devhub.{}.failure'.format(scanner_name)) # We log the exception but we do not raise to avoid perturbing the # submission flow. log.exception('Error in scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) return results
class ScannerResult(ModelBase): upload = models.ForeignKey( FileUpload, related_name='scanners_results', on_delete=models.SET_NULL, null=True, ) # Store the "raw" results of a scanner. results = JSONField(default=[]) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) version = models.ForeignKey( 'versions.Version', related_name='scanners_results', on_delete=models.CASCADE, null=True, ) has_matches = models.NullBooleanField() matched_rules = models.ManyToManyField('ScannerRule', through='ScannerMatch') class Meta: db_table = 'scanners_results' constraints = [ models.UniqueConstraint( fields=('upload', 'scanner', 'version'), name='scanners_results_upload_id_scanner_' 'version_id_ad9eb8a6_uniq', ) ] indexes = [models.Index(fields=('has_matches', ))] def add_yara_result(self, rule, tags=None, meta=None): """This method is used to store a Yara result.""" self.results.append({ 'rule': rule, 'tags': tags or [], 'meta': meta or {} }) def extract_rule_names(self): """This method parses the raw results and returns the (matched) rule names. Not all scanners have rules that necessarily match.""" if self.scanner == YARA: return sorted({result['rule'] for result in self.results}) if self.scanner == CUSTOMS and 'matchedRules' in self.results: return self.results['matchedRules'] # We do not have support for the remaining scanners (yet). return [] def save(self, *args, **kwargs): matched_rules = ScannerRule.objects.filter( scanner=self.scanner, name__in=self.extract_rule_names()) self.has_matches = bool(matched_rules) # Save the instance first... super().save(*args, **kwargs) # ...then add the associated rules. for scanner_rule in matched_rules: self.matched_rules.add(scanner_rule)
def run_scanner(results, upload_pk, scanner, api_url, api_key): """ Run a scanner on a FileUpload via RPC and store the results. - `results` are the validation results passed in the validation chain. This task is a validation task, which is why it must receive the validation results as first argument. - `upload_pk` is the FileUpload ID. """ scanner_name = SCANNERS.get(scanner) log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) if not results['metadata']['is_webextension']: log.info( 'Not running scanner "%s" for FileUpload %s, it is not a webextension.', scanner_name, upload_pk, ) return results upload = FileUpload.objects.get(pk=upload_pk) try: if not os.path.exists(upload.path): raise ValueError('File "{}" does not exist.'.format(upload.path)) scanner_result = ScannerResult(upload=upload, scanner=scanner) with statsd.timer('devhub.{}'.format(scanner_name)): _run_scanner_for_url( scanner_result, upload.get_authenticated_download_url(), scanner, api_url, api_key, ) scanner_result.save() if scanner_result.has_matches: statsd.incr('devhub.{}.has_matches'.format(scanner_name)) for scanner_rule in scanner_result.matched_rules.all(): statsd.incr('devhub.{}.rule.{}.match'.format( scanner_name, scanner_rule.id)) statsd.incr('devhub.{}.success'.format(scanner_name)) log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) except Exception as exc: statsd.incr('devhub.{}.failure'.format(scanner_name)) log.exception('Error in scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) if not waffle.switch_is_active('ignore-exceptions-in-scanner-tasks'): raise exc return results
class AbstractScannerRule(ModelBase): name = models.CharField( max_length=200, help_text=_('This is the exact name of the rule used by a scanner.'), ) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) action = models.PositiveSmallIntegerField(choices=ACTIONS.items(), default=NO_ACTION) is_active = models.BooleanField( default=True, help_text=_( 'When unchecked, the scanner results will not be bound to this ' 'rule and the action will not be executed.')) definition = models.TextField(null=True, blank=True) class Meta(ModelBase.Meta): abstract = True unique_together = ('name', 'scanner') def __str__(self): return self.name def clean(self): if self.scanner == YARA: self.clean_yara() def clean_yara(self): if not self.definition: raise ValidationError( {'definition': _('Yara rules should have a definition')}) if 'rule {}'.format(self.name) not in self.definition: raise ValidationError({ 'definition': _('The name of the rule in the definition should match ' 'the name of the scanner rule') }) if len(re.findall(r'rule\s+.+?\s+{', self.definition)) > 1: raise ValidationError({ 'definition': _('Only one Yara rule is allowed in the definition') }) try: yara.compile(source=self.definition) except yara.SyntaxError as syntaxError: raise ValidationError({ 'definition': _('The definition is not valid: %(error)s') % { 'error': syntaxError, } }) except Exception: raise ValidationError({ 'definition': _('An error occurred when compiling the definition') })
def get(self, request, format=None): label = self.request.query_params.get('label', None) if label is not None and label not in [LABEL_BAD, LABEL_GOOD]: raise ParseError("invalid value for label") scanner = self.request.query_params.get('scanner', None) if scanner is not None and scanner not in list(SCANNERS.values()): raise ParseError("invalid value for scanner") return super().get(request, format)
def run_scanner(upload_pk, scanner, api_url, api_key): """ Run a scanner on a FileUpload via RPC and store the results. """ scanner_name = SCANNERS.get(scanner) log.info('Starting scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) upload = FileUpload.objects.get(pk=upload_pk) if not upload.path.endswith('.xpi'): log.info('Not running scanner "%s" for FileUpload %s, it is not a xpi ' 'file.', scanner_name, upload_pk) return try: if not os.path.exists(upload.path): raise ValueError('File "{}" does not exist.' .format(upload.path)) result = ScannersResult() result.upload = upload result.scanner = scanner with statsd.timer('devhub.{}'.format(scanner_name)): json_payload = { 'api_key': api_key, 'download_url': upload.get_authenticated_download_url(), } response = requests.post(url=api_url, json=json_payload, timeout=settings.SCANNER_TIMEOUT) try: results = response.json() except ValueError: # Log the response body when JSON decoding has failed. raise ValueError(response.text) if 'error' in results: raise ValueError(results) result.results = results result.save() statsd.incr('devhub.{}.success'.format(scanner_name)) log.info('Ending scanner "%s" task for FileUpload %s.', scanner_name, upload_pk) except Exception: statsd.incr('devhub.{}.failure'.format(scanner_name)) # We log the exception but we do not raise to avoid perturbing the # submission flow. log.exception('Error in scanner "%s" task for FileUpload %s.', scanner_name, upload_pk)
def get(self, request, format=None): if not waffle.switch_is_active('enable-scanner-results-api'): raise Http404 label = self.request.query_params.get('label', None) if label is not None and label not in [LABEL_BAD, LABEL_GOOD]: raise ParseError("invalid value for label") scanner = self.request.query_params.get('scanner', None) if scanner is not None and scanner not in list(SCANNERS.values()): raise ParseError("invalid value for scanner") return super().get(request, format)
class ScannerRule(ModelBase): name = models.CharField( max_length=200, help_text=_('This is the exact name of the rule used by a scanner.'), ) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) action = models.PositiveSmallIntegerField(choices=ACTIONS.items(), default=NO_ACTION) is_active = models.BooleanField(default=True) class Meta: db_table = 'scanners_rules' unique_together = ('name', 'scanner') def __str__(self): return self.name
def get_queryset(self): label = self.request.query_params.get('label', None) scanner = next( (key for key in SCANNERS if SCANNERS.get(key) == self.request.query_params.get('scanner')), None, ) bad_results = ScannerResult.objects.exclude(version=None) good_results = ScannerResult.objects.exclude(version=None) if scanner: bad_results = bad_results.filter(scanner=scanner) good_results = good_results.filter(scanner=scanner) bad_filters = Q(state=TRUE_POSITIVE) | Q( version__versionlog__activity_log__action__in=( amo.LOG.BLOCKLIST_BLOCK_ADDED.id, amo.LOG.BLOCKLIST_BLOCK_EDITED.id, )) good_results = ( good_results.filter( Q(version__versionlog__activity_log__action__in=( amo.LOG.CONFIRM_AUTO_APPROVED.id, amo.LOG.APPROVE_VERSION.id, )) & ~Q(version__versionlog__activity_log__user_id=settings. TASK_USER_ID # noqa )).exclude(bad_filters).distinct().annotate(label=Value( LABEL_GOOD, output_field=CharField())).all()) bad_results = (bad_results.filter(bad_filters).distinct().annotate( label=Value(LABEL_BAD, output_field=CharField())).all()) queryset = ScannerResult.objects.none() if not label: queryset = good_results.union(bad_results) elif label == LABEL_GOOD: queryset = good_results elif label == LABEL_BAD: queryset = bad_results return queryset.order_by('-pk')
class ScannerResult(ModelBase): upload = models.ForeignKey(FileUpload, related_name='scanners_results', on_delete=models.SET_NULL, null=True) # Store the "raw" results of a scanner (optionally). results = JSONField(default=None) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) version = models.ForeignKey('versions.Version', related_name='scanners_results', on_delete=models.CASCADE, null=True) matches = JSONField(default=[]) has_matches = models.NullBooleanField() class Meta: db_table = 'scanners_results' constraints = [ models.UniqueConstraint(fields=('upload', 'scanner', 'version'), name='scanners_results_upload_id_scanner_' 'version_id_ad9eb8a6_uniq'), ] indexes = [ models.Index(fields=('has_matches', )), ] def add_match(self, rule, tags=None, meta=None): """This method is used to store a matched rule.""" self.matches.append({ 'rule': rule, 'tags': tags or [], 'meta': meta or {}, }) self.has_matches = True def save(self, *args, **kwargs): if self.has_matches is None: self.has_matches = bool(self.matches) super().save(*args, **kwargs) @property def matched_rules(self): return sorted({match['rule'] for match in self.matches})
class ScannersResult(ModelBase): upload = models.ForeignKey(FileUpload, related_name='scanners_results', on_delete=models.SET_NULL, null=True) results = JSONField(default={}) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) version = models.ForeignKey('versions.Version', related_name='scanners_results', on_delete=models.CASCADE, null=True) class Meta: db_table = 'scanners_results' constraints = [ models.UniqueConstraint(fields=('upload', 'scanner', 'version'), name='scanners_results_upload_id_scanner_' 'version_id_ad9eb8a6_uniq'), ]
def test_scanner_choices(self): field = self.model._meta.get_field('scanner') assert field.choices == SCANNERS.items()
def get_scanner_name(self): return SCANNERS.get(self.scanner)
class AbstractScannerResult(ModelBase): # Store the "raw" results of a scanner. results = JSONField(default=[]) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) has_matches = models.NullBooleanField() state = models.PositiveSmallIntegerField( choices=RESULT_STATES.items(), null=True, blank=True, default=UNKNOWN ) version = models.ForeignKey( 'versions.Version', related_name="%(class)ss", on_delete=models.CASCADE, null=True, ) class Meta(ModelBase.Meta): abstract = True indexes = [ models.Index(fields=('has_matches',)), models.Index(fields=('state',)), ] def add_yara_result(self, rule, tags=None, meta=None): """This method is used to store a Yara result.""" self.results.append( {'rule': rule, 'tags': tags or [], 'meta': meta or {}} ) def extract_rule_names(self): """This method parses the raw results and returns the (matched) rule names. Not all scanners have rules that necessarily match.""" if self.scanner == YARA: return sorted({result['rule'] for result in self.results}) if self.scanner == CUSTOMS and 'matchedRules' in self.results: return self.results['matchedRules'] # We do not have support for the remaining scanners (yet). return [] def save(self, *args, **kwargs): rule_model = self._meta.get_field('matched_rules').related_model matched_rules = rule_model.objects.filter( scanner=self.scanner, name__in=self.extract_rule_names(), # See: https://github.com/mozilla/addons-server/issues/13143 is_active=True, ) self.has_matches = bool(matched_rules) # Save the instance first... super().save(*args, **kwargs) # ...then add the associated rules. for scanner_rule in matched_rules: self.matched_rules.add(scanner_rule) def get_scanner_name(self): return SCANNERS.get(self.scanner) def get_pretty_results(self): return json.dumps(self.results, indent=2) def get_files_by_matched_rules(self): res = defaultdict(list) if self.scanner is YARA: for item in self.results: res[item['rule']].append(item['meta'].get('filename', '???')) elif self.scanner is CUSTOMS: scanMap = self.results.get('scanMap', {}) for filename, rules in scanMap.items(): for ruleId, data in rules.items(): if data.get('RULE_HAS_MATCHED', False): res[ruleId].append(filename) return res def can_report_feedback(self): return ( self.has_matches and self.state == UNKNOWN and self.scanner != WAT ) def can_revert_feedback(self): return ( self.has_matches and self.state != UNKNOWN and self.scanner != WAT ) def get_git_repository(self): return { CUSTOMS: settings.CUSTOMS_GIT_REPOSITORY, YARA: settings.YARA_GIT_REPOSITORY, }.get(self.scanner) @classmethod def run_action(cls, version): """Try to find and execute an action for a given version, based on the scanner results and associated rules. If an action is found, it is run synchronously from this method, not in a task. """ log.info('Checking rules and actions for version %s.', version.pk) rule_model = cls.matched_rules.rel.model result_query_name = cls._meta.get_field( 'matched_rules').related_query_name() rule = ( rule_model.objects.filter(**{ f'{result_query_name}__version': version, 'is_active': True, }) .order_by( # The `-` sign means descending order. '-action' ) .first() ) if not rule: log.info('No action to execute for version %s.', version.pk) return action_id = rule.action action_name = ACTIONS.get(action_id, None) if not action_name: raise Exception("invalid action %s" % action_id) ACTION_FUNCTIONS = { NO_ACTION: _no_action, FLAG_FOR_HUMAN_REVIEW: _flag_for_human_review, DELAY_AUTO_APPROVAL: _delay_auto_approval, DELAY_AUTO_APPROVAL_INDEFINITELY: ( _delay_auto_approval_indefinitely), } action_function = ACTION_FUNCTIONS.get(action_id, None) if not action_function: raise Exception("no implementation for action %s" % action_id) # We have a valid action to execute, so let's do it! log.info( 'Starting action "%s" for version %s.', action_name, version.pk) action_function(version) log.info('Ending action "%s" for version %s.', action_name, version.pk)
class AbstractScannerRule(ModelBase): name = models.CharField( max_length=200, help_text=_('This is the exact name of the rule used by a scanner.'), ) scanner = models.PositiveSmallIntegerField(choices=SCANNERS.items()) action = models.PositiveSmallIntegerField(choices=ACTIONS.items(), default=NO_ACTION) is_active = models.BooleanField( default=True, help_text=_( 'When unchecked, the scanner results will not be bound to this ' 'rule and the action will not be executed.'), ) definition = models.TextField(null=True, blank=True) class Meta(ModelBase.Meta): abstract = True unique_together = ('name', 'scanner') @classmethod def get_yara_externals(cls): """ Return a dict with the various external variables we inject in every yara rule automatically and their default values. """ return { 'is_json_file': False, 'is_manifest_file': False, 'is_locale_file': False, } def __str__(self): return self.name def clean(self): if self.scanner == YARA: self.clean_yara() def clean_yara(self): if not self.definition: raise ValidationError( {'definition': _('Yara rules should have a definition')}) if f'rule {self.name}' not in self.definition: raise ValidationError({ 'definition': _('The name of the rule in the definition should match ' 'the name of the scanner rule') }) if len(re.findall(r'rule\s+.+?\s+{', self.definition)) > 1: raise ValidationError({ 'definition': _('Only one Yara rule is allowed in the definition') }) try: yara.compile(source=self.definition, externals=self.get_yara_externals()) except yara.SyntaxError as syntaxError: raise ValidationError({ 'definition': _('The definition is not valid: %(error)s') % { 'error': syntaxError } }) except Exception: raise ValidationError({ 'definition': _('An error occurred when compiling the definition') })