Пример #1
0
def test_web_create_traceroute_recaptcha_ok(client, good_recaptcha, db,
                                            rabbitmq):
    raw = open("tests/data/traceroute/mtr_json_1.json").read()
    res = client.post("/new_traceroute",
                      data=dict(raw=raw),
                      follow_redirects=True)

    assert res.status_code == 200

    # Wait a bit to let enrichers complete their work.
    time.sleep(2)

    t = Traceroute.select()[0]

    assert t.parsed is True
    assert t.enriched is True
    assert t.last_seen is not None
    last_seen_1 = t.last_seen

    assert res.status_code == 200
    assert f"<h4>Traceroute ID {t.id}".encode() in res.data, res.data

    # To be sure that we're putting the meta to avoid traceroute pages indexing.
    assert b'<meta name="robots" content="noindex">' in res.data

    # The content of res.data represents the web page that was generated
    # as soon as the traceroute was created, so it contains all the
    # elements of a work in progress traceroute. Even though at this point
    # the actual traceroute should be already fully enriched, the information
    # do not show up in the page yet; in the real live experience the browser
    # receives the enriched data via SocketIO and updates the content using
    # JavaScript.

    # Checking that enriched information don't show up in the web page.
    assert b"15169" not in res.data
    assert b"12874" not in res.data

    # Now, let's request the traceroute web page again. At this point, fresh
    # content is generated and somewhere, in the HTML code, there should be
    # the JSON that contains all the information about the newly created traceroute.
    # Also, the traceroute should NOT be marked with the graphical component that
    # identifies it as WIP.

    res = client.get(f"/t/{t.id}")
    assert res.status_code == 200
    assert t.to_json().encode() in res.data

    assert b'<div class="spinner-border spinner-border-sm" role="status" id="tr_status_wip">' not in res.data

    # Check that the last_seen attribute of the traceroute is more recent than
    # how it was when the traceroute was created.
    t = Traceroute.select()[0]

    last_seen_2 = t.last_seen

    assert last_seen_2 > last_seen_1
Пример #2
0
def status():
    traceroute_id = request.args.get("id")
    try:
        traceroute = Traceroute.get(Traceroute.id == traceroute_id)
    except DoesNotExist:
        return jsonify({"status": "not found"})

    return jsonify({"status": traceroute.status})
Пример #3
0
def t(traceroute_id):
    try:
        traceroute = Traceroute.get(Traceroute.id == traceroute_id)
    except DoesNotExist:
        return render_template("traceroute.html", err_code=1)

    traceroute.last_seen = datetime.datetime.utcnow()
    traceroute.save()

    return render_template("traceroute.html", t=traceroute)
Пример #4
0
def test_enricher_basic():
    raw = open("tests/data/traceroute/mtr_json_1.json").read()
    create_traceroute(raw)

    # Verify the calls to the function that's used to
    # retrieve IP information from external sources.

    # Please note: a call for hop 9 IP 216.239.50.241 should not
    # be performed because an entry for 216.239.32.0/19 should
    # be found while getting IPDBInfo for 216.239.51.9
    assert get_ip_info_from_external_sources_mock.call_args_list == [
        call(IPv4Address("89.97.200.190")),
        call(IPv4Address("62.101.124.17")),  # 62-101-124-17.fastres.net
        call(IPv4Address("209.85.168.64")),
        call(IPv4Address("216.239.51.9")),
        # call(IPv4Address("216.239.50.241")), <<< expected to be missing
        call(IPv4Address("8.8.8.8"))
    ]

    # Verify that the traceroute is marked as parsed
    # and enriched properly.

    t = Traceroute.select()[0]

    assert t.parsed is True
    assert t.enriched is True
    assert t.enrichment_started is not None
    assert t.enrichment_completed is not None
    assert t.enrichment_completed >= t.enrichment_started

    assert len(t.hops) == 10

    hop = t.get_hop_n(1)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "192.168.1.254"
    assert str(host.ip) == "192.168.1.254"
    assert host.name is None
    assert host.enriched is True
    assert len(host.origins) == 0

    hop = t.get_hop_n(6)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "62-101-124-17.fastres.net"
    assert str(host.ip) == "62.101.124.17"
    assert host.name == "62-101-124-17.fastres.net"
    assert host.enriched is True
    assert len(host.origins) == 1
    origin = host.origins[0]
    assert origin.asn == 12874
    assert origin.holder == "FASTWEB - Fastweb SpA"

    hop = t.get_hop_n(10)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "dns.google"
    assert str(host.ip) == "8.8.8.8"
    assert host.name == "dns.google"
    assert host.enriched is True
    assert len(host.origins) == 1
    origin = host.origins[0]
    assert origin.asn == 15169
    assert origin.holder == "GOOGLE"

    # Verify the to_dict of Traceroute.
    json_t = t.to_dict()

    assert json_t["enriched"] is True
    assert json_t["status"] == "enriched"
    assert len(json_t["hops"]) == 10

    json_host = json_t["hops"][1][0]
    assert json_host["ip"] == "192.168.1.254"
    assert json_host["ixp_network"] is None
    assert json_host["origins"] is None
    assert isinstance(json_host["avg_rtt"], float)
    assert isinstance(json_host["loss"], float)
    assert json_host["loss"] == 0

    json_host = json_t["hops"][10][0]
    assert json_host["ip"] == "8.8.8.8"
    assert json_host["ixp_network"] is None
    assert json_host["origins"] == [(15169, "GOOGLE")]
    assert isinstance(json_host["avg_rtt"], float)
    assert isinstance(json_host["loss"], float)
    assert json_host["name"] == "dns.google"

    # Just verify that the dict is serializable as JSON.
    t.to_json()
Пример #5
0
def stats():
    token = request.args.get("token", None)

    if not token:
        return "Token not provided"

    lookback_minutes = int(request.args.get("lookback", 60))

    cfg = load_config()

    if cfg["web"].get("stats_token", "") != token:
        return "Token not valid"

    LAST_PERIOD = datetime.timedelta(minutes=lookback_minutes)

    oldest_limit = datetime.datetime.utcnow() - LAST_PERIOD

    traceroutes = Traceroute.select().where(
        Traceroute.created > oldest_limit
    )

    time_to_complete_enrichment: List[int] = []
    time_to_start_enrichment: List[int] = []

    stats = Stats()

    for t in traceroutes:
        stats.total_cnt += 1

        if not t.parsed:
            stats.not_parsed_cnt += 1
            continue

        stats.parsed_cnt += 1

        if not t.enrichment_started:
            stats.enrichment_not_started_cnt += 1
            continue

        time_to_start_enrichment.append(
            (t.enrichment_started - t.created).total_seconds()
        )

        if t.enrichment_started and not t.enrichment_completed:
            stats.enrichment_not_completed_cnt += 1
            continue

        if t.enriched:
            stats.enriched_cnt += 1

            time_to_complete_enrichment.append(
                (t.enrichment_completed - t.enrichment_started).total_seconds()
            )

    stats.avg_to_start_enrichment = 0
    if len(time_to_start_enrichment):
        stats.avg_to_start_enrichment = sum(time_to_start_enrichment) / len(time_to_start_enrichment)

    stats.avg_to_complete_enrichment = 0
    if len(time_to_complete_enrichment):
        stats.avg_to_complete_enrichment = sum(time_to_complete_enrichment) / len(time_to_complete_enrichment)

    res = """
Traceroutes:
- total:         {total_cnt}
- parsed:        {parsed_cnt}
- not parsed:    {not_parsed_cnt}
- enriched:      {enriched_cnt}
- not enriched:  {not_enriched_cnt} ({not_enriched_perc} %)
- not started:   {enrichment_not_started_cnt} ({enrichment_not_started_perc} %)
- not completed: {enrichment_not_completed_cnt} ({enrichment_not_completed_perc} %)
- avg time:
  - to start enrichment:    {avg_to_start_enrichment}
  - to complete enrichment: {avg_to_complete_enrichment}
""".format(**stats.as_dict())

    all_good = True

    for attr, default_value in THRESHOLDS:
        threshold = int(request.args.get(f"threshold_{attr}", default_value))
        value = getattr(stats, attr)

        if value > threshold:
            res += f"ERROR: the value of {attr} ({value}) is above the threshold ({threshold})\n"
            all_good = False

    if all_good:
        res += "ALL GOOD!\n"

    return Response(res, mimetype="text/plain")
Пример #6
0
def test_consumers_mysql_goes_away():
    """
    Create a traceroute and get it parsed and enriched
    using consumers, then shut MySQL down and spin
    it up again, and process another traceroute, to test
    that consumers are actually able to reconnect to the
    DB server properly if it goes down and comes back.
    """

    assert len(CONSUMER_THREADS) == CONSUMER_THREADS_NUM

    raw = open("tests/data/traceroute/mtr_json_1.json").read()
    t1_id = create_traceroute(raw).id

    _wait_for_completion()

    # Compare the SocketIO records emitted by the enricher
    # with those that we expect to see.
    socketio_emitted_records = _get_socketio_emitted_records()

    expected_socketio_emitted_records = _prefix_traceroute_id(
        EXPECTED_SOCKETIO_EMIT_CALLS_TR1,
        t1_id
    )

    assert socketio_emitted_records == expected_socketio_emitted_records

    # Verify that the last call to SocketIO is the one
    # that notifies about the completion of the
    # enrichment process.
    t = Traceroute.get(Traceroute.id == t1_id)
    socketio_emit_mock.assert_called_with(
        SOCKET_IO_ENRICHMENT_COMPLETED_EVENT,
        {
            "traceroute_id": t1_id,
            "traceroute": t.to_dict(),
            "text": t.to_text()
        },
        namespace=f"/t/{t1_id}"
    )

    t = Traceroute.get(Traceroute.id == t1_id)

    assert t.parsed is True
    assert t.enriched is True

    MYSQL_CONTAINER.kill_existing_container()

    MYSQL_CONTAINER.ensure_is_up()
    MYSQL_CONTAINER.recreate_last_schema()

    raw = open("tests/data/traceroute/mtr_json_1.json").read()
    t2_id = create_traceroute(raw).id

    _wait_for_completion()

    # At this point, the records emitted via SocketIO
    # should be those originated while parsing the first
    # traceroute + those originated while parsing the
    # second one.
    socketio_emitted_records = _get_socketio_emitted_records()

    expected_socketio_emitted_records = \
        _prefix_traceroute_id(EXPECTED_SOCKETIO_EMIT_CALLS_TR1, t1_id) + \
        _prefix_traceroute_id(EXPECTED_SOCKETIO_EMIT_CALLS_TR1, t2_id)

    assert socketio_emitted_records == expected_socketio_emitted_records

    t = Traceroute.get(Traceroute.id == t2_id)

    assert t.parsed is True
    assert t.enriched is True

    assert len(CONSUMER_THREADS) == CONSUMER_THREADS_NUM
Пример #7
0
def test_ixp_networks_updater_integration(ixp_networks):
    """
    Process a traceroute having some hops crossing an IXP network.
    """

    raw = open("tests/data/traceroute/mtr_json_2.json").read()
    t1_id = create_traceroute(raw).id

    _wait_for_completion()

    # Compare the SocketIO records emitted by the enricher
    # with those that we expect to see.
    socketio_emitted_records = _get_socketio_emitted_records()

    expected_socketio_emitted_records = _prefix_traceroute_id(
        EXPECTED_SOCKETIO_EMIT_CALLS_TR2,
        t1_id
    )

    assert socketio_emitted_records == expected_socketio_emitted_records

    # Verify that the last call to SocketIO is the one
    # that notifies about the completion of the
    # enrichment process.
    t = Traceroute.get(Traceroute.id == t1_id)
    socketio_emit_mock.assert_called_with(
        SOCKET_IO_ENRICHMENT_COMPLETED_EVENT,
        {
            "traceroute_id": t1_id,
            "traceroute": t.to_dict(),
            "text": t.to_text()
        },
        namespace=f"/t/{t1_id}"
    )

    t = Traceroute.get(Traceroute.id == t1_id)

    assert t.parsed is True
    assert t.enriched is True

    assert len(t.hops) == 8

    # Check that the host inside the IXP network is correct.
    hop = t.get_hop_n(7)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "217.29.66.1"
    assert str(host.ip) == "217.29.66.1"
    assert host.name == "mix-1.mix-it.net"
    assert host.enriched is True
    assert len(host.origins) == 0
    assert host.ixp_network is not None
    assert host.ixp_network.lan_name is None
    assert host.ixp_network.ix_name == "MIX-IT"
    assert host.ixp_network.ix_description == "Milan Internet eXchange"

    # Now, let's verify that all the enrichers from
    # the consumer threads got their IP info DB populated
    # equally. This is to ensure that the IXPNetworksUpdater
    # properly dispatch the IP info entries to all the
    # consumers.
    for thread in CONSUMER_THREADS:
        for enricher in thread.enrichers:

            ip_info_db = enricher.ip_info_db

            assert len(ip_info_db.nodes()) == 4

            assert sorted(ip_info_db.prefixes()) == sorted([
                "89.97.0.0/16",
                "93.62.0.0/15",
                "217.29.66.0/23",
                "217.29.72.0/21"
            ])

            assert ip_info_db.search_exact(
                "217.29.66.0/23"
            ).data["ip_db_info"] == IPDBInfo(
                prefix=ipaddress.ip_network("217.29.66.0/23"),
                origins=None,
                ixp_network=IXPNetwork(
                    lan_name=None,
                    ix_name="MIX-IT",
                    ix_description="Milan Internet eXchange"
                )
            )

    # Check now that the IP Info DB is populated properly.
    db_prefixes = IPInfo_Prefix.select()

    # Build a dict using DB records to make comparisons easier.
    db_prefixes_dict = {
        db_prefix.prefix: db_prefix.origins
        for db_prefix in db_prefixes
    }

    assert len(db_prefixes_dict.keys()) == 4

    assert sorted(db_prefixes_dict.keys()) == sorted([
        ipaddress.IPv4Network("89.97.0.0/16"),
        ipaddress.IPv4Network("93.62.0.0/15"),
        ipaddress.IPv4Network("217.29.66.0/23"),
        ipaddress.IPv4Network("217.29.72.0/21"),
    ])

    db_prefix = IPInfo_Prefix.get(prefix="217.29.66.0/23")
    assert db_prefix.to_ipdbinfo() == IPDBInfo(
        prefix=ipaddress.ip_network("217.29.66.0/23"),
        origins=None,
        ixp_network=IXPNetwork(
            lan_name=None,
            ix_name="MIX-IT",
            ix_description="Milan Internet eXchange"
        )
    )
Пример #8
0
def test_consumers_basic():
    """
    Create a traceroute and get it parsed and enriched
    using consumers.
    """

    # Just to be sure that we're actually using the
    # n. of thread we expect, just in case I'll change
    # the way consumer threads are spun up while doing
    # some debugging.
    assert CONSUMER_THREADS_NUM > 1
    assert len(CONSUMER_THREADS) == CONSUMER_THREADS_NUM

    raw = open("tests/data/traceroute/mtr_json_1.json").read()
    t_id = create_traceroute(raw).id

    _wait_for_completion()

    # Compare the SocketIO records emitted by the enricher
    # with those that we expect to see.
    socketio_emitted_records = _get_socketio_emitted_records()

    expected_socketio_emitted_records = _prefix_traceroute_id(
        EXPECTED_SOCKETIO_EMIT_CALLS_TR1,
        t_id
    )

    assert socketio_emitted_records == expected_socketio_emitted_records

    # Verify that the last call to SocketIO is the one
    # that notifies about the completion of the
    # enrichment process.
    t = Traceroute.select()[0]
    socketio_emit_mock.assert_called_with(
        SOCKET_IO_ENRICHMENT_COMPLETED_EVENT,
        {
            "traceroute_id": t_id,
            "traceroute": t.to_dict(),
            "text": t.to_text()
        },
        namespace=f"/t/{t_id}"
    )

    # Let's check that the traceroute is in the expected
    # state, and that hops and hosts were processed.
    t = Traceroute.select()[0]

    assert t.parsed is True
    assert t.enriched is True

    assert len(t.hops) == 10

    hop = t.get_hop_n(1)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "192.168.1.254"
    assert str(host.ip) == "192.168.1.254"
    assert host.name is None
    assert host.enriched is True
    assert len(host.origins) == 0
    assert host.ixp_network is None

    hop = t.get_hop_n(6)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "62-101-124-17.fastres.net"
    assert str(host.ip) == "62.101.124.17"
    assert host.name == "62-101-124-17.fastres.net"
    assert host.enriched is True
    assert len(host.origins) == 1
    origin = host.origins[0]
    assert origin.asn == 12874
    assert origin.holder == "FASTWEB - Fastweb SpA"
    assert host.ixp_network is None

    hop = t.get_hop_n(10)
    assert len(hop.hosts) == 1
    host = hop.hosts[0]
    assert host.original_host == "dns.google"
    assert str(host.ip) == "8.8.8.8"
    assert host.name == "dns.google"
    assert host.enriched is True
    assert len(host.origins) == 1
    origin = host.origins[0]
    assert origin.asn == 15169
    assert origin.holder == "GOOGLE"
    assert host.ixp_network is None

    # Now, let's verify that all the enrichers from
    # the consumer threads got their IP info DB populated
    # equally. This is to ensure that the IP info records
    # are properly distributed across the consumers.
    for thread in CONSUMER_THREADS:
        for enricher in thread.enrichers:
            ip_info_db = enricher.ip_info_db

            assert len(ip_info_db.nodes()) == 5

            assert sorted(ip_info_db.prefixes()) == sorted([
                "89.97.0.0/16",
                "62.101.124.0/22",
                "209.85.128.0/17",
                "216.239.32.0/19",
                "8.8.8.0/24",
            ])

            assert ip_info_db.search_exact(
                "89.97.0.0/16"
            ).data["ip_db_info"] == IPDBInfo(
                prefix=ipaddress.ip_network("89.97.0.0/16"),
                origins=[
                    (12874, "FASTWEB - Fastweb SpA")
                ],
                ixp_network=None
            )

    # Check now that the IP Info DB is populated properly.
    db_prefixes = IPInfo_Prefix.select()

    # Build a dict using DB records to make comparisons easier.
    db_prefixes_dict = {
        db_prefix.prefix: db_prefix.origins
        for db_prefix in db_prefixes
    }

    assert len(db_prefixes_dict.keys()) == 5

    assert sorted(db_prefixes_dict.keys()) == sorted([
        ipaddress.IPv4Network("89.97.0.0/16"),
        ipaddress.IPv4Network("62.101.124.0/22"),
        ipaddress.IPv4Network("209.85.128.0/17"),
        ipaddress.IPv4Network("216.239.32.0/19"),
        ipaddress.IPv4Network("8.8.8.0/24")
    ])

    db_prefix = IPInfo_Prefix.get(prefix="89.97.0.0/16")
    assert db_prefix.to_ipdbinfo() == IPDBInfo(
        prefix=ipaddress.ip_network("89.97.0.0/16"),
        origins=[
            (12874, "FASTWEB - Fastweb SpA")
        ],
        ixp_network=None
    )

    # Verify that metrics logging is working properly.
    # To see which metrics have been collected:
    #   metrics_mock_wrapper.mm.print_records()

    mm_records = metrics_mock_wrapper.mm.get_records()

    # Expecting 5 calls to the function that performs
    # external queries to fetch IP info.
    # Every time, we want the counter to be increased.
    mm_ip_info_from_external_sources = filter(
        lambda r: (
            r[0] == "incr" and
            r[1] == ("rich_traceroute.enrichers.enricher."
                     "ip_info_from_external_sources") and
            r[2] == 1
        ),
        mm_records
    )
    assert len(list(mm_ip_info_from_external_sources)) == 5

    # Check that we're keeping track of how long those
    # 5 upstream queries take to complete.
    mm_ip_info_from_external_sources = filter(
        lambda r: (
            r[0] == "timing" and
            r[1] == ("rich_traceroute.enrichers.enricher."
                     "ripestat.query_time")
        ),
        mm_records
    )
    assert len(list(mm_ip_info_from_external_sources)) == 5