async def test_alert_instances(self): """Test that the number of alert instances is returned.""" response = await self.collect(get_request_text=self.OWASP_ZAP_XML) url1 = "http://www.hackazon.com/products_pictures/Ray_Ban.jpg" url2 = "http://www.hackazon.com/products_pictures/How_to_Marry_a_Millionaire.jpg" expected_entities = [ dict( key=md5_hash( f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{url1}" ), old_key=md5_hash(f"10021:16:15:3:GET:{url1}"), name=self.WARNING_NAME, description=self.WARNING_DESCRIPTION, location=f"GET {url1}", uri=url1, risk=self.WARNING_RISK, ), dict( key=md5_hash( f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{url2}" ), old_key=md5_hash(f"10021:16:15:3:GET:{url2}"), name=self.WARNING_NAME, description=self.WARNING_DESCRIPTION, location=f"GET {url2}", uri=url2, risk=self.WARNING_RISK, ), ] self.assert_measurement(response, value="2", entities=expected_entities)
async def _parse_source_responses( self, responses: SourceResponses) -> SourceMeasurement: entities: Dict[str, Entity] = {} tag_re = re.compile(r"<[^>]*>") risks = cast(List[str], self._parameter("risks")) for alert in await self.__alerts(responses, risks): ids = [ alert.findtext(id_tag, default="") for id_tag in ("alert", "pluginid", "cweid", "wascid", "sourceid") ] name = alert.findtext("name", default="") description = tag_re.sub("", alert.findtext("desc", default="")) risk = alert.findtext("riskdesc", default="") for alert_instance in alert.findall("./instances/instance"): method = alert_instance.findtext("method", default="") uri = self.__stable( hashless(URL(alert_instance.findtext("uri", default="")))) key = md5_hash(f"{':'.join(ids)}:{method}:{uri}") entities[key] = Entity( key=key, old_key=md5_hash(f"{':'.join(ids[1:])}:{method}:{uri}"), name=name, description=description, uri=uri, location=f"{method} {uri}", risk=risk) return SourceMeasurement(entities=list(entities.values()))
def __alert_instance_entity(self, ids, entity_kwargs, alert_instance) -> Entity: """Create an alert instance entity.""" method = alert_instance.findtext("method", default="") uri = self.__stable_url(hashless(URL(alert_instance.findtext("uri", default="")))) key = md5_hash(f"{':'.join(ids)}:{method}:{uri}") old_key = md5_hash(f"{':'.join(ids[1:])}:{method}:{uri}") location = f"{method} {uri}" return Entity(key=key, old_key=old_key, uri=uri, location=location, **entity_kwargs)
async def test_variable_url_regexp(self): """Test that parts of URLs can be ignored.""" self.sources["source_id"]["parameters"]["variable_url_regexp"] = ["[A-Za-z_]+.jpg"] metric = dict(type="security_warnings", addition="sum", sources=self.sources) response = await self.collect(metric, get_request_text=self.xml) stable_url = "http://www.hackazon.com/products_pictures/variable-part-removed" expected_entities = [ dict( key=md5_hash(f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{stable_url}"), old_key=md5_hash(f"10021:16:15:3:GET:{stable_url}"), name=self.warning_name, uri=stable_url, description=self.warning_description, location=f"GET {stable_url}", risk=self.warning_risk)] self.assert_measurement(response, value="1", entities=expected_entities)
async def _parse_source_responses( self, responses: SourceResponses) -> SourceMeasurement: impact_levels = self._parameter("impact") entity_attributes = [] for response in responses: json = await response.json(content_type=None) url = json["url"] for violation in json.get("violations", []): for node in violation.get("nodes", []): if node.get("impact") not in impact_levels: continue entity_attributes.append( dict(description=violation.get("description"), element=node.get("html"), help=violation.get("helpUrl"), impact=node.get("impact"), page=url, url=url, violation_type=violation.get("id"))) entities = [ Entity(key=md5_hash(",".join( str(value) for value in attributes.values())), **attributes) for attributes in entity_attributes ] return SourceMeasurement(entities=entities)
async def test_warnings(self): """Test that the number of security warnings is returned.""" metric = dict(type="security_warnings", addition="sum", sources=self.sources) response = await self.collect(metric, get_request_text=self.xml) url1 = "http://www.hackazon.com/products_pictures/Ray_Ban.jpg" url2 = "http://www.hackazon.com/products_pictures/How_to_Marry_a_Millionaire.jpg" expected_entities = [ dict( key=md5_hash(f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{url1}"), old_key=md5_hash(f"10021:16:15:3:GET:{url1}"), name=self.warning_name, description=self.warning_description, location=f"GET {url1}", uri=url1, risk=self.warning_risk), dict( key=md5_hash(f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{url2}"), old_key=md5_hash(f"10021:16:15:3:GET:{url2}"), name=self.warning_name, description=self.warning_description, location=f"GET {url2}", uri=url2, risk=self.warning_risk)] self.assert_measurement(response, value="2", entities=expected_entities)
def setUp(self): super().setUp() self.header_row = "URL,Violation Type,Impact,Help,HTML Element,Messages,DOM Element\n" self.serious_violation = "url1,aria-input-field-name,serious,help1,html1\n" self.moderate_violation = "url2,aria-hidden-focus,moderate,help2,html2,messages2,dom2\n" self.csv = self.header_row + self.serious_violation + self.moderate_violation self.metric = dict( type="accessibility", addition="sum", sources=dict(source_id=dict( type="axecsv", parameters=dict(url="https://axecsv")))) self.expected_entities = [{ 'url': 'url1', 'violation_type': 'aria-input-field-name', 'impact': 'serious', 'element': None, 'page': 'url1', 'description': None, 'help': 'help1' }, { 'url': 'url2', 'violation_type': 'aria-hidden-focus', 'impact': 'moderate', 'element': 'dom2', 'page': 'url2', 'description': 'messages2', 'help': 'help2' }] for entity in self.expected_entities: entity["key"] = md5_hash(",".join( str(value) for value in entity.values()))
def setUp(self): """Extend to set up test data.""" super().setUp() self.header_row = "URL,Violation Type,Impact,Help,HTML Element,Messages,DOM Element\n" self.serious_violation = "url1,aria-input-field-name,serious,help1,html1\n" self.moderate_violation = "url2,aria-hidden-focus,moderate,help2,html2,messages2,dom2\n" self.csv = self.header_row + self.serious_violation + self.moderate_violation self.expected_entities = [ { "url": "url1", "violation_type": "aria-input-field-name", "impact": "serious", "element": None, "page": "url1", "description": None, "help": "help1", }, { "url": "url2", "violation_type": "aria-hidden-focus", "impact": "moderate", "element": "dom2", "page": "url2", "description": "messages2", "help": "help2", }, ] for entity in self.expected_entities: entity["key"] = md5_hash(",".join( str(value) for value in entity.values()))
async def _parse_source_responses( self, responses: SourceResponses) -> SourceMeasurement: entities = [] severities = self._parameter("severities") for response in responses: json = await response.json(content_type=None) vulnerabilities = json.get("vulnerabilities", []) if isinstance( json, dict) else [] for vulnerability in vulnerabilities: if vulnerability["severity"].lower() not in severities: continue package_include = " ➜ ".join([str(package) for package in vulnerability["from"][1:]]) \ if isinstance(vulnerability["from"], list) else vulnerability["from"] fix = ", ".join([str(package) for package in vulnerability["fixedIn"]]) \ if isinstance(vulnerability["fixedIn"], list) else vulnerability["fixedIn"] key = md5_hash(f'{vulnerability["id"]}:{package_include}') entities.append( Entity(key=key, cve=vulnerability["title"], package=vulnerability["packageName"], severity=vulnerability["severity"], version=vulnerability['version'], package_include=package_include, fix=fix, url=f"https://snyk.io/vuln/{vulnerability['id']}")) return SourceMeasurement(entities=entities)
async def test_variable_url_regexp(self): """Test that parts of URLs can be ignored.""" self.set_source_parameter("variable_url_regexp", ["[A-Za-z_]+.jpg"]) response = await self.collect(get_request_text=self.OWASP_ZAP_XML) stable_url = "http://www.hackazon.com/products_pictures/variable-part-removed" expected_entities = [ dict( key=md5_hash( f"X-Content-Type-Options Header Missing:10021:16:15:3:GET:{stable_url}" ), old_key=md5_hash(f"10021:16:15:3:GET:{stable_url}"), name=self.WARNING_NAME, uri=stable_url, description=self.WARNING_DESCRIPTION, location=f"GET {stable_url}", risk=self.WARNING_RISK, ) ] self.assert_measurement(response, value="1", entities=expected_entities)
def _create_entity(vulnerability: dict[str, str], filename: str) -> Entity: """Create an entity from the vulnerability.""" return Entity( # Include the filename in the hash so that it is unique even when multiple images contain the # same package with the same vulnerability. Don't add a colon so existing hashes stay the same # if the source is not a zipped report (filename is an empty string in that case). key=md5_hash(f'{filename}{vulnerability["vuln"]}:{vulnerability["package"]}'), cve=vulnerability["vuln"], filename=filename, package=vulnerability["package"], severity=vulnerability["severity"], fix=vulnerability["fix"], url=vulnerability["url"], )
def setUp(self): super().setUp() self.tested_url = "https://tested_url" self.json = dict( url=self.tested_url, violations=[ dict( id="aria-input-field-name", description="description1", helpUrl="https://help1", tags=["cat.color", "wcag2aa", "wcag143"], nodes=[dict(impact="serious", html="html1")], ), dict( id="aria-hidden-focus", description="description2", helpUrl="https://help2", nodes=[dict(impact="moderate", html="html2")], ), ], ) self.metric = dict(type="accessibility", addition="sum", sources=self.sources) self.expected_entities = [ { "description": "description1", "element": "html1", "help": "https://help1", "impact": "serious", "page": self.tested_url, "url": self.tested_url, "violation_type": "aria-input-field-name", "tags": "cat.color, wcag143, wcag2aa", }, { "description": "description2", "element": "html2", "help": "https://help2", "impact": "moderate", "page": self.tested_url, "url": self.tested_url, "violation_type": "aria-hidden-focus", "tags": "", }, ] for entity in self.expected_entities: entity["key"] = md5_hash(",".join( str(value) for key, value in entity.items() if key != "tags"))
async def test_alert_types(self): """Test that the number of alert types is returned.""" self.set_source_parameter("alerts", "alert types") response = await self.collect(get_request_text=self.OWASP_ZAP_XML) expected_entities = [ dict( key=md5_hash( "X-Content-Type-Options Header Missing:10021:16:15:3"), name=self.WARNING_NAME, description=self.WARNING_DESCRIPTION, risk=self.WARNING_RISK, ), ] self.assert_measurement(response, value="1", entities=expected_entities)
async def _parse_source_responses( self, responses: SourceResponses ) -> SourceMeasurement: # skipcq: PY-D0003 entities = [] for response in responses: json = await response.json(content_type=None) vulnerabilities = json.get("vulnerabilities", []) for vulnerability in vulnerabilities: key = md5_hash( f'{vulnerability["title"]}:{vulnerability["description"]}') entities.append( Entity(key=key, title=vulnerability["title"], description=vulnerability["description"], severity=vulnerability["severity"])) return SourceMeasurement(entities=entities)
async def _parse_entities(self, responses: SourceResponses) -> Entities: """Override to parse the CSV and create the entities.""" entity_attributes = [ dict( url=str(row["URL"]), violation_type=row["Violation Type"], impact=row["Impact"], element=row["DOM Element"], page=re.sub(r"http[s]?://[^/]+", "", row["URL"]), description=row["Messages"], help=row["Help"], ) for row in await self.__parse_csv(responses) ] return Entities( Entity(key=md5_hash(",".join( str(value) for value in attributes.values())), **attributes) for attributes in entity_attributes)
async def _parse_entities(self, responses: SourceResponses) -> Entities: """Override to parse the security warnings from the JSON.""" entities = Entities() for response in responses: json = await response.json(content_type=None) vulnerabilities = json.get("vulnerabilities", []) for vulnerability in vulnerabilities: key = md5_hash( f'{vulnerability["title"]}:{vulnerability["description"]}') entities.append( Entity( key=key, title=vulnerability["title"], description=vulnerability["description"], severity=vulnerability["severity"], )) return entities
async def _parse_source_responses( self, responses: SourceResponses) -> SourceMeasurement: entity_attributes = [ dict(url=str(row["URL"]), violation_type=row["Violation Type"], impact=row["Impact"], element=row["DOM Element"], page=re.sub(r'http[s]?://[^/]+', '', row['URL']), description=row["Messages"], help=row["Help"]) for row in await self.__parse_csv(responses) ] entities = [ Entity(key=md5_hash(",".join( str(value) for value in attributes.values())), **attributes) for attributes in entity_attributes ] return SourceMeasurement(entities=entities)
async def test_warnings(self): """Test the number of security warnings.""" response = await self.collect( get_request_json_return_value=self.vulnerabilities_json) expected_entities = [ dict( key=md5_hash("CVE-000:package"), filename="", cve="CVE-000", url=self.url, fix="None", severity="Low", package="package", ) ] self.assert_measurement(response, value="1", entities=expected_entities)
async def test_embedded_newlines(self): """Test that embedded newlines are ignored.""" violation_with_newline = 'url3,aria-hidden-focus,moderate,help3,html3,"messages3\nsecond line",dom3\n' expected_entity = { "url": "url3", "violation_type": "aria-hidden-focus", "impact": "moderate", "element": "dom3", "page": "url3", "description": "messages3\nsecond line", "help": "help3", } expected_entity["key"] = md5_hash(",".join( str(value) for value in expected_entity.values())) response = await self.collect(get_request_text=self.csv + violation_with_newline) self.assert_measurement(response, value="3", entities=self.expected_entities + [expected_entity])
async def _parse_source_responses( self, responses: SourceResponses) -> SourceMeasurement: severities = self._parameter("severities") entities = [] for response in responses: json = await response.json(content_type=None) vulnerabilities = json.get("vulnerabilities", []) if isinstance( json, dict) else [] entities.extend([ Entity(key=md5_hash( f'{vulnerability["vuln"]}:{vulnerability["package"]}'), cve=vulnerability["vuln"], package=vulnerability["package"], severity=vulnerability["severity"], fix=vulnerability["fix"], url=vulnerability["url"]) for vulnerability in vulnerabilities if vulnerability["severity"] in severities ]) return SourceMeasurement(entities=entities)
async def test_embedded_newlines(self): """Test that embedded newlines are ignored.""" violation_with_newline = 'url3,aria-hidden-focus,moderate,help3,html3,"messages3\nsecond line",dom3\n' expected_entity = { 'url': 'url3', 'violation_type': 'aria-hidden-focus', 'impact': 'moderate', 'element': 'dom3', 'page': 'url3', 'description': 'messages3\nsecond line', 'help': 'help3' } expected_entity["key"] = md5_hash(",".join( str(value) for value in expected_entity.values())) response = await self.collect(self.metric, get_request_text=self.csv + violation_with_newline) self.assert_measurement(response, value="3", entities=self.expected_entities + [expected_entity])
async def test_zipped_report(self): """Test that a zip with reports can be read.""" self.set_source_parameter("url", "anchore.zip") filename = "vuln.json" zipfile = self.zipped_report( (filename, json.dumps(self.vulnerabilities_json)), ("details.json", json.dumps(self.details_json))) response = await self.collect(get_request_content=zipfile) expected_entities = [ dict( key=md5_hash(f"{filename}CVE-000:package"), filename=filename, cve="CVE-000", url=self.url, fix="None", severity="Low", package="package", ) ] self.assert_measurement(response, value="1", entities=expected_entities)
def setUp(self): super().setUp() self.tested_url = "https://tested_url" self.json = dict( url=self.tested_url, violations=[ dict(id="aria-input-field-name", description="description1", helpUrl="https://help1", nodes=[dict(impact="serious", html="html1")]), dict(id="aria-hidden-focus", description="description2", helpUrl="https://help2", nodes=[dict(impact="moderate", html="html2")]) ]) self.metric = dict(type="accessibility", addition="sum", sources=self.sources) self.expected_entities = [{ 'description': 'description1', 'element': 'html1', 'help': 'https://help1', 'impact': 'serious', 'page': self.tested_url, 'url': self.tested_url, 'violation_type': 'aria-input-field-name' }, { 'description': 'description2', 'element': 'html2', 'help': 'https://help2', 'impact': 'moderate', 'page': self.tested_url, 'url': self.tested_url, 'violation_type': 'aria-hidden-focus' }] for entity in self.expected_entities: entity["key"] = md5_hash(",".join( str(value) for value in entity.values()))
def __alert_type_entity(ids, entity_kwargs) -> Entity: """Create an alert type entity.""" return Entity(key=md5_hash(f"{':'.join(ids)}"), **entity_kwargs)
def __create_key(attributes) -> str: """Create a key for the entity based on the attributes.""" # We ignore tags for two reasons: 1) If the violation is the same, so should the tags be. 2) Tags were added to # the entities later and including them in the key would change the key for existing entities. return md5_hash(",".join(str(value) for key, value in attributes.items() if key != "tags"))