def test_adv_record_products():
    record = AdvisoryRecord(vulnerability_id="CVE-XXXX-YYYY",
                            description=ADVISORY_TEXT)
    record.analyze()

    # print(record)
    assert "Chrysler" in record.affected_products
def test_adv_record_versions():

    record = AdvisoryRecord(vulnerability_id="CVE-2014-0050",
                            description=ADVISORY_TEXT)
    record.analyze()

    assert "15.26.1" in record.versions
    assert "15.26" not in record.versions
def test_adv_record_code_tokens():
    record = AdvisoryRecord(vulnerability_id="CVE-XXXX-YYYY",
                            description=ADVISORY_TEXT_2)
    record.analyze()

    assert record.code_tokens == (
        "IO",
        "2.7,",
        "FileNameUtils.normalize",
        '"//../foo",',
        '"\\..\\foo",',
        '"limited"',
    )
Пример #4
0
def test_extract_referred_to_by_pages_linked_from_advisories(repository, requests_mock):
    requests_mock.get(
        "https://for.testing.purposes/containing_commit_id_in_text_2",
        text="some text r97993e3d78e1f5389b7b172ba9f308440830ce5 blah",
    )

    advisory_record = AdvisoryRecord(
        vulnerability_id="CVE-2020-26258",
        references=["https://for.testing.purposes/containing_commit_id_in_text_2"],
    )

    commit = Commit(
        commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5",
        repository="test_repository",
    )
    assert extract_referred_to_by_pages_linked_from_advisories(
        commit, advisory_record
    ) == {
        "https://for.testing.purposes/containing_commit_id_in_text_2",
    }

    commit = Commit(
        commit_id="f4d2eabd921cbd8808b9d923ee63d44538b4154f",
        repository="test_repository",
    )
    assert (
        extract_referred_to_by_pages_linked_from_advisories(commit, advisory_record)
        == set()
    )
Пример #5
0
def test_extract_other_CVE_in_message():
    commit = Commit(
        commit_id="test_commit",
        repository="test_repository",
        cve_refs=["CVE-2021-29425", "CVE-2021-21251"],
    )
    advisory_record = AdvisoryRecord(vulnerability_id="CVE-2020-31284")
    assert extract_other_CVE_in_message(commit, advisory_record) == {
        "CVE-2021-29425",
        "CVE-2021-21251",
    }
    advisory_record = AdvisoryRecord(vulnerability_id="CVE-2021-29425")
    result = extract_other_CVE_in_message(commit, advisory_record)
    assert result == {
        "CVE-2021-21251",
    }
Пример #6
0
 def test_same_path_only(paths):
     commit = Commit(
         commit_id="test_commit", repository="test_repository", changed_files=paths
     )
     advisory_record = AdvisoryRecord(
         vulnerability_id="test_advisory_record", paths=paths[:2]
     )
     assert extract_changed_relevant_paths(commit, advisory_record) == set(paths[:2])
Пример #7
0
def test_time_between_commit_and_advisory_record():
    commit = Commit(
        commit_id="test_commit", repository="test_repository", timestamp=142
    )
    advisory_record = AdvisoryRecord(
        vulnerability_id="test_advisory_record", published_timestamp=100
    )
    assert (
        extract_time_between_commit_and_advisory_record(commit, advisory_record) == 42
    )
Пример #8
0
 def test_no_match(paths):
     commit = Commit(
         commit_id="test_commit",
         repository="test_repository",
         changed_files=paths[:1],
     )
     advisory_record = AdvisoryRecord(
         vulnerability_id="test_advisory_record", paths=paths[2:]
     )
     assert extract_changed_relevant_paths(commit, advisory_record) == set()
Пример #9
0
def report_as_json(results: "list[Commit]", advisory_record: AdvisoryRecord):

    data = {
        "advisory_record": advisory_record.dict(),
        "commits": [r.dict() for r in results],
    }
    filename = "prospector-report.json"
    _logger.info("Writing results to " + filename)
    with open(filename, "w", encoding="utf8") as json_file:
        json.dump(data, json_file, ensure_ascii=True, indent=4)
    return filename
def advisory_record():
    return AdvisoryRecord(
        vulnerability_id="CVE-2020-26258",
        repository_url="https://github.com/apache/struts",
        published_timestamp=1607532756,
        references=[
            "https://reference.to/some/commit/7532d2fb0d6081a12c2a48ec854a81a8b718be62"
        ],
        code_tokens=["AutomaticWorkQueueImpl"],
        paths=["pom.xml"],
    )
Пример #11
0
def test_extract_references_vuln_id():
    commit = Commit(
        commit_id="test_commit",
        repository="test_repository",
        cve_refs=[
            "test_advisory_record",
            "another_advisory_record",
            "yet_another_advisory_record",
        ],
    )
    advisory_record = AdvisoryRecord(vulnerability_id="test_advisory_record")
    result = extract_references_vuln_id(commit, advisory_record)
    assert result is True
Пример #12
0
def test_extract_referred_to_by_pages_linked_from_advisories_wrong_url(repository):
    advisory_record = AdvisoryRecord(
        vulnerability_id="CVE-2020-26258",
        references=["https://non-existing-url.com"],
    )

    commit = Commit(
        commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5",
        repository="test_repository",
    )
    assert not extract_referred_to_by_pages_linked_from_advisories(
        commit, advisory_record
    )
Пример #13
0
    def test_sub_path_matching(paths, sub_paths):
        commit = Commit(
            commit_id="test_commit", repository="test_repository", changed_files=paths
        )
        advisory_record = AdvisoryRecord(
            vulnerability_id="test_advisory_record", paths=sub_paths
        )

        matched_paths = {
            "fire-nation/zuko/lightning.png",
            "water-bending/katara/necklace.gif",
        }

        assert extract_changed_relevant_paths(commit, advisory_record) == matched_paths
Пример #14
0
def test_report_generation():
    candidates = []
    for _ in range(100):
        annotated_candidates = Commit(
            commit_id=random_commit_hash(),
            repository=random_url(4),
            message=" ".join(random_list_of_strs(100)),
            timestamp=randint(0, 100000),
            hunks=random_list_of_hunks(1000, 42),
            diff=random_list_of_strs(200),
            changed_files=random_list_of_path(4, 42),
            message_reference_content=random_list_of_strs(42),
            jira_refs=random_list_of_jira_refs(42),
            ghissue_refs=random_list_of_github_issue_ids(100000, 42),
            cve_refs=random_list_of_cve(42),
            tags=random_list_of_strs(42),
            annotations=random_dict_of_strs(16, 10),
        )

        candidates.append(annotated_candidates)

    advisory = AdvisoryRecord(
        vulnerability_id=random_list_of_cve(max_count=1, min_count=1)[0],
        repository_url=random_url(4),
        published_timestamp=randint(0, 100000),
        last_modified_timestamp=randint(0, 100000),
        references=random_list_of_strs(42),
        references_content=random_list_of_strs(42),
        advisory_references=random_list_of_cve(42),
        affected_products=random_list_of_strs(42),
        description=" ".join(random_list_of_strs(42)),
        preprocessed_vulnerability_description=" ".join(
            random_list_of_strs(42)),
        relevant_tags=random_list_of_strs(42),
        versions=random_list_of_version(42, 4, 42),
        from_nvd=random_bool(),
        paths=random_list_of_path(4, 42),
        code_tokens=tuple(random_list_of_strs(42)),
    )

    filename = "test_report.html"
    if os.path.isfile(filename):
        os.remove(filename)
    generated_report = report_as_html(candidates,
                                      advisory,
                                      filename,
                                      statistics=sample_statistics())
    assert os.path.isfile(generated_report)
Пример #15
0
def test_extract_referred_to_by_nvd(repository):
    advisory_record = AdvisoryRecord(
        vulnerability_id="CVE-2020-26258",
        references=[
            "https://lists.apache.org/thread.html/r97993e3d78e1f5389b7b172ba9f308440830ce5f051ee62714a0aa34@%3Ccommits.struts.apache.org%3E",
            "https://other.com",
        ],
    )

    commit = Commit(
        commit_id="r97993e3d78e1f5389b7b172ba9f308440830ce5",
        repository="test_repository",
    )
    assert extract_referred_to_by_nvd(commit, advisory_record) == {
        "https://lists.apache.org/thread.html/r97993e3d78e1f5389b7b172ba9f308440830ce5f051ee62714a0aa34@%3Ccommits.struts.apache.org%3E",
    }

    commit = Commit(
        commit_id="f4d2eabd921cbd8808b9d923ee63d44538b4154f",
        repository="test_repository",
    )
    assert extract_referred_to_by_nvd(commit, advisory_record) == set()
Пример #16
0
def test_is_commit_reachable_from_given_tag(repository):

    repo = repository
    commit = repo.get_commit("7532d2fb0d6081a12c2a48ec854a81a8b718be62")
    test_commit = preprocess_commit(commit)

    advisory_record = AdvisoryRecord(
        vulnerability_id="CVE-2020-26258",
        repository_url="https://github.com/apache/struts",
        paths=["pom.xml"],
        published_timestamp=1000000,
        versions=["STRUTS_2_1_3", "STRUTS_2_3_9"],
    )

    assert not is_commit_reachable_from_given_tag(
        test_commit, advisory_record, advisory_record.versions[0]
    )

    assert is_commit_reachable_from_given_tag(
        preprocess_commit(repo.get_commit("2e19fc6670a70c13c08a3ed0927abc7366308bb1")),
        advisory_record,
        advisory_record.versions[1],
    )
Пример #17
0
def test_extract_path_similarities():
    code_tokens = [
        "TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato",
        "Bolin+Bumi+Ozai+Katara",
        "Jinora.Appa.Unalaq.Zaheer",
        "Naga.LinBeifong",
        "Sokka.Kya",
        "Bumi=Momo=Naga=Iroh",
        "Sokka_Unalaq",
        "Sokka.Iroh.Pabu",
        "LinBeifong=Zuko",
        "TenzinBolinSokka",
        "Korra-AsamiSato-Pabu-Iroh",
        "Mako.Naga",
        "Jinora=Bumi",
        "BolinAppaKuvira",
        "TophBeifongIroh",
        "Amon+Zuko+Unalaq",
    ]
    paths = [
        "Unalaq/Aang/Suyin Beifong",
        "Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer",
        "Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko",
        "Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi",
        "Momo",
        "Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq",
    ]
    commit = Commit(changed_files=paths)
    advisory = AdvisoryRecord(
        vulnerability_id=random_list_of_cve(max_count=1, min_count=1)[0],
        code_tokens=code_tokens,
    )
    similarities: pandas.DataFrame = extract_path_similarities(commit, advisory)
    expected = (
        ",changed file,code token,jaccard,sorensen-dice,otsuka-ochiai,levenshtein,damerau-levenshtein,length diff,inverted normalized levenshtein,inverted normalized damerau-levenshtein\n"
        "0,Unalaq/Aang/Suyin Beifong,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.09090909090909091,0.16666666666666666,0.17677669529663687,8,8,4,0.19999999999999996,0.19999999999999996\n"
        "1,Unalaq/Aang/Suyin Beifong,Bolin+Bumi+Ozai+Katara,0.0,0.0,0.0,4,4,0,0.6,0.6\n"
        "2,Unalaq/Aang/Suyin Beifong,Jinora.Appa.Unalaq.Zaheer,0.14285714285714285,0.25,0.25,4,4,0,0.6,0.6\n"
        "3,Unalaq/Aang/Suyin Beifong,Naga.LinBeifong,0.16666666666666666,0.2857142857142857,0.2886751345948129,3,3,1,0.7,0.7\n"
        "4,Unalaq/Aang/Suyin Beifong,Sokka.Kya,0.0,0.0,0.0,4,4,2,0.6,0.6\n"
        "5,Unalaq/Aang/Suyin Beifong,Bumi=Momo=Naga=Iroh,0.0,0.0,0.0,4,4,0,0.6,0.6\n"
        "6,Unalaq/Aang/Suyin Beifong,Sokka_Unalaq,0.2,0.3333333333333333,0.35355339059327373,4,4,2,0.6,0.6\n"
        "7,Unalaq/Aang/Suyin Beifong,Sokka.Iroh.Pabu,0.0,0.0,0.0,4,4,1,0.6,0.6\n"
        "8,Unalaq/Aang/Suyin Beifong,LinBeifong=Zuko,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n"
        "9,Unalaq/Aang/Suyin Beifong,TenzinBolinSokka,0.0,0.0,0.0,4,4,1,0.6,0.6\n"
        "10,Unalaq/Aang/Suyin Beifong,Korra-AsamiSato-Pabu-Iroh,0.0,0.0,0.0,5,5,1,0.5,0.5\n"
        "11,Unalaq/Aang/Suyin Beifong,Mako.Naga,0.0,0.0,0.0,4,4,2,0.6,0.6\n"
        "12,Unalaq/Aang/Suyin Beifong,Jinora=Bumi,0.0,0.0,0.0,4,4,2,0.6,0.6\n"
        "13,Unalaq/Aang/Suyin Beifong,BolinAppaKuvira,0.0,0.0,0.0,4,4,1,0.6,0.6\n"
        "14,Unalaq/Aang/Suyin Beifong,TophBeifongIroh,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n"
        "15,Unalaq/Aang/Suyin Beifong,Amon+Zuko+Unalaq,0.16666666666666666,0.2857142857142857,0.2886751345948129,4,4,1,0.6,0.6\n"
        "16,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.25,0.4,0.4008918628686366,8,8,0,0.19999999999999996,0.19999999999999996\n"
        "17,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Bolin+Bumi+Ozai+Katara,0.1,0.18181818181818182,0.1889822365046136,8,8,4,0.19999999999999996,0.19999999999999996\n"
        "18,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Jinora.Appa.Unalaq.Zaheer,0.1,0.18181818181818182,0.1889822365046136,7,7,4,0.30000000000000004,0.30000000000000004\n"
        "19,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Naga.LinBeifong,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "20,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka.Kya,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "21,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Bumi=Momo=Naga=Iroh,0.1,0.18181818181818182,0.1889822365046136,8,8,4,0.19999999999999996,0.19999999999999996\n"
        "22,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka_Unalaq,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "23,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Sokka.Iroh.Pabu,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "24,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,LinBeifong=Zuko,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "25,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TenzinBolinSokka,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "26,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Korra-AsamiSato-Pabu-Iroh,0.2,0.3333333333333333,0.3380617018914066,6,6,3,0.4,0.4\n"
        "27,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Mako.Naga,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "28,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Jinora=Bumi,0.125,0.2222222222222222,0.2672612419124244,7,7,6,0.30000000000000004,0.30000000000000004\n"
        "29,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,BolinAppaKuvira,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "30,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,TophBeifongIroh,0.1111111111111111,0.2,0.2182178902359924,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "31,Tenzin/Asami Sato/Suyin Beifong/Tenzin/Bumi/Zaheer,Amon+Zuko+Unalaq,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "32,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.23076923076923078,0.375,0.375,8,8,0,0.19999999999999996,0.19999999999999996\n"
        "33,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Bolin+Bumi+Ozai+Katara,0.09090909090909091,0.16666666666666666,0.17677669529663687,7,7,4,0.30000000000000004,0.30000000000000004\n"
        "34,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,8,8,4,0.19999999999999996,0.19999999999999996\n"
        "35,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Naga.LinBeifong,0.1,0.18181818181818182,0.20412414523193154,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "36,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka.Kya,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "37,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Bumi=Momo=Naga=Iroh,0.09090909090909091,0.16666666666666666,0.17677669529663687,7,7,4,0.30000000000000004,0.30000000000000004\n"
        "38,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka_Unalaq,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "39,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Sokka.Iroh.Pabu,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "40,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,LinBeifong=Zuko,0.1,0.18181818181818182,0.20412414523193154,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "41,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TenzinBolinSokka,0.1,0.18181818181818182,0.20412414523193154,7,7,5,0.30000000000000004,0.30000000000000004\n"
        "42,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Korra-AsamiSato-Pabu-Iroh,0.18181818181818182,0.3076923076923077,0.31622776601683794,7,7,3,0.30000000000000004,0.30000000000000004\n"
        "43,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Mako.Naga,0.1111111111111111,0.2,0.25,7,7,6,0.30000000000000004,0.30000000000000004\n"
        "44,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Jinora=Bumi,0.0,0.0,0.0,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "45,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,BolinAppaKuvira,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "46,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,TophBeifongIroh,0.0,0.0,0.0,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "47,Asami Sato/Tenzin/Tonraq/Katara/Tarrlok/Naga/Zuko,Amon+Zuko+Unalaq,0.1,0.18181818181818182,0.20412414523193154,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "48,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.3333333333333333,0.5,0.5,9,9,1,0.09999999999999998,0.09999999999999998\n"
        "49,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Bolin+Bumi+Ozai+Katara,0.2,0.3333333333333333,0.35355339059327373,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "50,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,9,9,5,0.09999999999999998,0.09999999999999998\n"
        "51,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Naga.LinBeifong,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "52,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka.Kya,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n"
        "53,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Bumi=Momo=Naga=Iroh,0.09090909090909091,0.16666666666666666,0.17677669529663687,8,8,5,0.19999999999999996,0.19999999999999996\n"
        "54,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka_Unalaq,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n"
        "55,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Sokka.Iroh.Pabu,0.0,0.0,0.0,9,9,6,0.09999999999999998,0.09999999999999998\n"
        "56,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,LinBeifong=Zuko,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "57,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TenzinBolinSokka,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "58,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Korra-AsamiSato-Pabu-Iroh,0.18181818181818182,0.3076923076923077,0.31622776601683794,7,7,4,0.30000000000000004,0.30000000000000004\n"
        "59,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Mako.Naga,0.0,0.0,0.0,9,9,7,0.09999999999999998,0.09999999999999998\n"
        "60,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Jinora=Bumi,0.1111111111111111,0.2,0.25,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "61,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,BolinAppaKuvira,0.2222222222222222,0.36363636363636365,0.4082482904638631,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "62,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,TophBeifongIroh,0.2222222222222222,0.36363636363636365,0.4082482904638631,7,7,6,0.30000000000000004,0.30000000000000004\n"
        "63,Amon/Asami Sato/Bumi/Kuvira/Toph Beifong/Bolin/Bumi,Amon+Zuko+Unalaq,0.1,0.18181818181818182,0.20412414523193154,8,8,6,0.19999999999999996,0.19999999999999996\n"
        "64,Momo,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.0,0.0,0.0,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "65,Momo,Bolin+Bumi+Ozai+Katara,0.0,0.0,0.0,4,4,3,0.6,0.6\n"
        "66,Momo,Jinora.Appa.Unalaq.Zaheer,0.0,0.0,0.0,4,4,3,0.6,0.6\n"
        "67,Momo,Naga.LinBeifong,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "68,Momo,Sokka.Kya,0.0,0.0,0.0,2,2,1,0.8,0.8\n"
        "69,Momo,Bumi=Momo=Naga=Iroh,0.25,0.4,0.5,3,3,3,0.7,0.7\n"
        "70,Momo,Sokka_Unalaq,0.0,0.0,0.0,2,2,1,0.8,0.8\n"
        "71,Momo,Sokka.Iroh.Pabu,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "72,Momo,LinBeifong=Zuko,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "73,Momo,TenzinBolinSokka,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "74,Momo,Korra-AsamiSato-Pabu-Iroh,0.0,0.0,0.0,5,5,4,0.5,0.5\n"
        "75,Momo,Mako.Naga,0.0,0.0,0.0,2,2,1,0.8,0.8\n"
        "76,Momo,Jinora=Bumi,0.0,0.0,0.0,2,2,1,0.8,0.8\n"
        "77,Momo,BolinAppaKuvira,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "78,Momo,TophBeifongIroh,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "79,Momo,Amon+Zuko+Unalaq,0.0,0.0,0.0,3,3,2,0.7,0.7\n"
        "80,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TophBeifong_Zuko_IknikBlackstoneVarrick_AsamiSato,0.13333333333333333,0.23529411764705882,0.23570226039551587,9,9,2,0.09999999999999998,0.09999999999999998\n"
        "81,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Bolin+Bumi+Ozai+Katara,0.08333333333333333,0.15384615384615385,0.16666666666666666,9,9,6,0.09999999999999998,0.09999999999999998\n"
        "82,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Jinora.Appa.Unalaq.Zaheer,0.08333333333333333,0.15384615384615385,0.16666666666666666,10,10,6,0.0,0.0\n"
        "83,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Naga.LinBeifong,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "84,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka.Kya,0.1,0.18181818181818182,0.23570226039551587,9,9,8,0.09999999999999998,0.09999999999999998\n"
        "85,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Bumi=Momo=Naga=Iroh,0.0,0.0,0.0,10,10,6,0.0,0.0\n"
        "86,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka_Unalaq,0.2222222222222222,0.36363636363636365,0.47140452079103173,8,8,8,0.19999999999999996,0.19999999999999996\n"
        "87,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Sokka.Iroh.Pabu,0.09090909090909091,0.16666666666666666,0.19245008972987526,9,9,7,0.09999999999999998,0.09999999999999998\n"
        "88,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,LinBeifong=Zuko,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "89,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TenzinBolinSokka,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "90,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Korra-AsamiSato-Pabu-Iroh,0.07692307692307693,0.14285714285714285,0.14907119849998599,10,10,5,0.0,0.0\n"
        "91,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Mako.Naga,0.1,0.18181818181818182,0.23570226039551587,9,9,8,0.09999999999999998,0.09999999999999998\n"
        "92,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Jinora=Bumi,0.0,0.0,0.0,10,10,8,0.0,0.0\n"
        "93,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,BolinAppaKuvira,0.2,0.3333333333333333,0.3849001794597505,9,9,7,0.09999999999999998,0.09999999999999998\n"
        "94,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,TophBeifongIroh,0.2,0.3333333333333333,0.3849001794597505,8,8,7,0.19999999999999996,0.19999999999999996\n"
        "95,Kuvira/Bolin/Lin Beifong/Sokka/Mako/Korra/Toph Beifong/Unalaq,Amon+Zuko+Unalaq,0.09090909090909091,0.16666666666666666,0.19245008972987526,9,9,7,0.09999999999999998,0.09999999999999998\n"
    )

    assert similarities.to_csv() == expected
def test_advisory_basic():
    adv_rec = AdvisoryRecord(vulnerability_id="CVE-2015-5612",
                             repository_url="https://github.com/abc/xyz")

    assert adv_rec.repository_url == "https://github.com/abc/xyz"
def prospector(  # noqa: C901
    vulnerability_id: str,
    repository_url: str,
    publication_date: str = "",
    vuln_descr: str = "",
    tag_interval: str = "",
    version_interval: str = "",
    modified_files: "list[str]" = [],
    code_tokens: "list[str]" = [],
    time_limit_before: int = TIME_LIMIT_BEFORE,
    time_limit_after: int = TIME_LIMIT_AFTER,
    use_nvd: bool = False,
    nvd_rest_endpoint: str = "",
    backend_address: str = "",
    git_cache: str = GIT_CACHE,
    limit_candidates: int = MAX_CANDIDATES,
    active_rules: "list[str]" = ["ALL"],
    model_name: str = "",
) -> "list[Commit]":

    _logger.info("begin main commit and CVE processing")

    # -------------------------------------------------------------------------
    # advisory record extraction
    # -------------------------------------------------------------------------
    advisory_record = AdvisoryRecord(
        vulnerability_id=vulnerability_id,
        repository_url=repository_url,
        description=vuln_descr,
        from_nvd=use_nvd,
        nvd_rest_endpoint=nvd_rest_endpoint,
    )

    _logger.pretty_log(advisory_record)

    advisory_record.analyze(use_nvd=use_nvd)
    _logger.info(f"{advisory_record.code_tokens=}")

    if publication_date != "":
        advisory_record.published_timestamp = int(
            datetime.strptime(publication_date, r"%Y-%m-%dT%H:%M%z").timestamp()
        )

    if len(code_tokens) > 0:
        advisory_record.code_tokens += tuple(code_tokens)
        # drop duplicates
        advisory_record.code_tokens = list(set(advisory_record.code_tokens))

    # FIXME this should be handled better (or '' should not end up in the modified_files in
    # the first place)
    if modified_files != [""]:
        advisory_record.paths += modified_files

    _logger.info(f"{advisory_record.code_tokens=}")
    # print(advisory_record.paths)

    # -------------------------------------------------------------------------
    # retrieval of commit candidates
    # -------------------------------------------------------------------------
    with ExecutionTimer(
        core_statistics.sub_collection(name="retrieval of commit candidates")
    ):
        _logger.info(
            "Downloading repository {} in {}..".format(repository_url, git_cache)
        )
        repository = Git(repository_url, git_cache)
        repository.clone()
        tags = repository.get_tags()

        _logger.debug(f"Found tags: {tags}")

        _logger.info("Done retrieving %s" % repository_url)

        prev_tag = None
        following_tag = None
        if tag_interval != "":
            prev_tag, following_tag = tag_interval.split(":")
        elif version_interval != "":
            vuln_version, fixed_version = version_interval.split(":")
            prev_tag = get_tag_for_version(tags, vuln_version)[0]
            following_tag = get_tag_for_version(tags, fixed_version)[0]

        since = None
        until = None
        if advisory_record.published_timestamp:
            since = advisory_record.published_timestamp - time_limit_before
            until = advisory_record.published_timestamp + time_limit_after

        candidates = repository.get_commits(
            since=since,
            until=until,
            ancestors_of=following_tag,
            exclude_ancestors_of=prev_tag,
            filter_files="*.java",
        )

        _logger.info("Found %d candidates" % len(candidates))
    # if some code_tokens were found in the advisory text, require
    # that candidate commits touch some file whose path contains those tokens
    # NOTE: this works quite well for Java, not sure how general this criterion is

    # -------------------------------------------------------------------------
    # commit filtering
    #
    # Here we apply additional criteria to discard commits from the initial
    # set extracted from the repository
    # # -------------------------------------------------------------------------
    # if advisory_record.code_tokens != []:
    #     _logger.info(
    #         "Detected tokens in advisory text, searching for files whose path contains those tokens"
    #     )
    #     _logger.info(advisory_record.code_tokens)

    # if modified_files == [""]:
    #     modified_files = advisory_record.code_tokens
    # else:
    #     modified_files.extend(advisory_record.code_tokens)

    # candidates = filter_by_changed_files(candidates, modified_files, repository)

    with ExecutionTimer(core_statistics.sub_collection(name="commit filtering")):
        candidates = filter_commits(candidates)

        _logger.debug(f"Collected {len(candidates)} candidates")

        if len(candidates) > limit_candidates:
            _logger.error(
                "Number of candidates exceeds %d, aborting." % limit_candidates
            )
            _logger.error(
                "Possible cause: the backend might be unreachable or otherwise unable to provide details about the advisory."
            )
            sys.exit(-1)

    # -------------------------------------------------------------------------
    # commit preprocessing
    # -------------------------------------------------------------------------

    with ExecutionTimer(
        core_statistics.sub_collection(name="commit preprocessing")
    ) as timer:
        raw_commit_data = dict()
        missing = []
        try:
            # Exploit the preprocessed commits already stored in the backend
            #      and only process those that are missing. Note: the endpoint
            #      does not exist (yet)
            r = requests.get(
                backend_address
                + "/commits/"
                + repository_url
                + "?commit_id="
                + ",".join(candidates)
            )
            _logger.info("The backend returned status '%d'" % r.status_code)
            if r.status_code != 200:
                _logger.error("This is weird...Continuing anyway.")
                missing = candidates
            else:
                raw_commit_data = r.json()
                _logger.info(
                    "Found {} preprocessed commits".format(len(raw_commit_data))
                )
        except requests.exceptions.ConnectionError:
            _logger.error(
                "Could not reach backend, is it running? The result of commit pre-processing will not be saved.",
                exc_info=log.config.level < logging.WARNING,
            )
            missing = candidates

        preprocessed_commits: "list[Commit]" = []
        for idx, commit in enumerate(raw_commit_data):
            if (
                commit
            ):  # None results are not in the DB, collect them to missing list, they need local preprocessing
                preprocessed_commits.append(Commit.parse_obj(commit))
            else:
                missing.append(candidates[idx])

        _logger.info("Preprocessing commits...")
        first_missing = len(preprocessed_commits)
        pbar = tqdm(missing)
        with Counter(
            timer.collection.sub_collection(name="commit preprocessing")
        ) as counter:
            counter.initialize("preprocessed commits", unit="commit")
            for commit_id in pbar:
                counter.increment("preprocessed commits")
                preprocessed_commits.append(
                    preprocess_commit(repository.get_commit(commit_id))
                )

        _logger.pretty_log(advisory_record)
        _logger.debug(f"preprocessed {len(preprocessed_commits)} commits")

        payload = [c.__dict__ for c in preprocessed_commits[first_missing:]]

    # -------------------------------------------------------------------------
    # save preprocessed commits to backend
    # -------------------------------------------------------------------------
    with ExecutionTimer(
        core_statistics.sub_collection(name="save preprocessed commits to backend")
    ):
        _logger.info("Sending preprocessing commits to backend...")
        try:
            r = requests.post(backend_address + "/commits/", json=payload)
            _logger.info(
                "Saving to backend completed (status code: %d)" % r.status_code
            )
        except requests.exceptions.ConnectionError:
            _logger.error(
                "Could not reach backend, is it running?"
                "The result of commit pre-processing will not be saved."
                "Continuing anyway.....",
                exc_info=log.config.level < logging.WARNING,
            )

    # TODO compute actual rank
    # This can be done by a POST request that creates a "search" job
    # whose inputs are the AdvisoryRecord, and the repository URL
    # The API returns immediately indicating a job id. From this
    # id, a URL can be constructed to poll the results asynchronously.
    # ranked_results = [repository.get_commit(c) for c in preprocessed_commits]

    # -------------------------------------------------------------------------
    # analyze candidates by applying rules and ML predictor
    # -------------------------------------------------------------------------

    with ExecutionTimer(
        core_statistics.sub_collection(name="analyze candidates")
    ) as timer:
        _logger.info("Extracting features from commits...")
        # annotated_candidates = []
        # with Counter(timer.collection.sub_collection("commit analysing")) as counter:
        #     counter.initialize("analyzed commits", unit="commit")
        #     # TODO remove "proactive" invocation of feature extraction
        #     for commit in tqdm(preprocessed_commits):
        #         counter.increment("analyzed commits")
        #         annotated_candidates.append(extract_features(commit, advisory_record))

        annotated_candidates = apply_rules(
            preprocessed_commits, advisory_record, active_rules=active_rules
        )
        annotated_candidates = rank(annotated_candidates, model_name=model_name)

    return annotated_candidates, advisory_record