Exemplo n.º 1
0
 def test_successful_login_reset(self):
     """Test that FindBlockedIPs resets its counter if there
     is a successful login request after a failed login request."""
     f_output_reset = self.tmpdir.join("reset_output.txt")
     input_reset = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature4_reset.txt'))
     f4_reset = FindBlockedIPs(input_reset, str(f_output_reset))
     f4_reset.parse()
     assert len(f4_reset.blocked_logs) == 0
Exemplo n.º 2
0
 def test_additional_unsuccessful_logins_are_blocked(self):
     """Test that FindBlockedIPs begins blocking incoming requests as soon as
     the threshhold for failed requests is met, and that additional incoming failed
     requests are blocked."""
     f_output_immediate = self.tmpdir.join("immediate_output.txt")
     input_immediate = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature4_immediate.txt'))
     f4_immediate = FindBlockedIPs(input_immediate, str(f_output_immediate))
     f4_immediate.parse()
     assert len(f4_immediate.blocked_logs) == 2
Exemplo n.º 3
0
 def test_k_larger_than_data(self):
     """Tests that FindMostActive does not throw errors when the k most active
     hosts requested is larger than the total number of unique hosts."""
     f_output_large_k = self.tmpdir.join("large_k_output.txt")
     input_large_k = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature1_grouping.txt'))
     f1_large_k = FindMostActive(input_large_k, str(f_output_large_k), 10)
     f1_large_k.parse()
     assert f_output_large_k.read(
     ) == 'google.com,3\nbing.com,2\naskjeeves.com,1\n'
 def test_k_larger_than_data(self):
     """Tests that FindMostIntensiveResources does not throw errors when the k most active
     resources requested is larger than the total number of unique resources."""
     f_output_large_k = self.tmpdir.join("large_k_output.txt")
     input_large_k = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature2_grouping.txt'))
     f2_large_k = FindMostIntensiveResources(input_large_k,
                                             str(f_output_large_k), 10)
     f2_large_k.parse()
     assert f_output_large_k.read() == '/\n/coolstuff.gif\n/lamestuff.gif\n'
 def test_single_entry(self):
     """Tests that FindHighestTrafficWindows correctly identifies that the
     highest traffic window for a single entry begins with the entry."""
     f_output_single_entry = self.tmpdir.join("single_entry_output.txt")
     input_single_entry = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature3_single_entry.txt'))
     f3_single_entry = FindHighestTrafficWindows(input_single_entry,
                                                 str(f_output_single_entry))
     f3_single_entry.parse()
     assert f_output_single_entry.read() == "01/Jul/1995:00:00:01 -0400,1\n"
 def test_timezone(self):
     """Tests that FindHighestTrafficWindows reads timezone from
     the input."""
     f_output_single_entry = self.tmpdir.join("single_entry_output.txt")
     input_single_entry = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature3_single_entry.txt'))
     f3_single_entry = FindHighestTrafficWindows(input_single_entry,
                                                 str(f_output_single_entry))
     f3_single_entry.parse()
     assert f3_single_entry.timezone == "-0400"
Exemplo n.º 7
0
 def test_failed_logins_depend_on_host(self):
     """Test that failed logins from multiple hosts do not lead
     to a block."""
     f_output_multiple_hosts = self.tmpdir.join("multiple_hosts_output.txt")
     input_multiple_hosts = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature4_multiple_hosts.txt'))
     f4_multiple_hosts = FindBlockedIPs(input_multiple_hosts,
                                        str(f_output_multiple_hosts))
     f4_multiple_hosts.parse()
     assert len(f4_multiple_hosts.blocked_logs) == 0
 def test_dynamic_window(self):
     """Tests that we can configure the size of the sliding window."""
     f_output_window = self.tmpdir.join("boundary_output.txt")
     input_window = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature3_boundary.txt'))
     f3_window = FindHighestTrafficWindows(input_window,
                                           str(f_output_window),
                                           k=2,
                                           minutes_per_bucket=120)
     f3_window.parse()
     assert f_output_window.read(
     ) == "01/Jul/1995:00:00:03 -0400,3\n01/Jul/1995:00:00:04 -0400,2\n"
Exemplo n.º 9
0
 def test_breaking_ties(self):
     """Tests that FindMostActive orders its output lexicographically when
     two hosts have equal traffic. In this case bing.com and google.com both have
     three hits, so bing.com should come first in the output file as b < g."""
     f_output_ties = self.tmpdir.join("ties_output.txt")
     input_ties = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature1_tie.txt'))
     f1_ties = FindMostActive(input_ties, str(f_output_ties), 3)
     f1_ties.parse()
     assert f1_ties.hosts_to_hits["google.com"] == 3
     assert f1_ties.hosts_to_hits["bing.com"] == 3
     assert f_output_ties.read() == 'bing.com,3\ngoogle.com,3\n'
 def test_breaking_ties(self):
     """Tests that FindMostIntensiveResources orders its output lexicographically when
     two resources have equal volume. In this case /coolstuff.gif and /lamestuff.gif both have
     total 2048, so /coolstuff.gif should come first as c < l."""
     f_output_ties = self.tmpdir.join("ties_output.txt")
     input_ties = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature2_tie.txt'))
     f2_ties = FindMostIntensiveResources(input_ties, str(f_output_ties), 3)
     f2_ties.parse()
     assert f2_ties.resources_to_bandwidth["/coolstuff.gif"] == 2048
     assert f2_ties.resources_to_bandwidth["/lamestuff.gif"] == 2048
     assert f_output_ties.read() == '/coolstuff.gif\n/lamestuff.gif\n'
 def test_in_between_values(self):
     """Tests that FindHighestTrafficWindows recognizes that highest
     traffic windows may begin between logged events."""
     f_output_multi_entry = self.tmpdir.join("multi_entry_output.txt")
     input_multi_entry = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature3_multi_entry.txt'))
     f3_multi_entry = FindHighestTrafficWindows(input_multi_entry,
                                                str(f_output_multi_entry))
     f3_multi_entry.parse()
     assert f_output_multi_entry.read(
     ) == "01/Jul/1995:00:00:01 -0400,2\n01/Jul/1995:00:00:02 -0400,1\n01/Jul/1995:00:00:03 -0400,1\n01/Jul/1995:00:00:04 -0400,1\n"
Exemplo n.º 12
0
 def test_handles_gaps(self):
     """Test that GetHostActivityLog outputs a 0 next to any gaps, rather than simply
     skipping them."""
     f_output_gaps = self.tmpdir.join("gaps_output.txt")
     input_gaps = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature5_gaps.txt'))
     f5_gaps = GetHostActivityLog(input_gaps,
                                  str(f_output_gaps),
                                  host_to_search="199.72.81.55",
                                  minutes_per_bin=5)
     f5_gaps.parse()
     assert f_output_gaps.read(
     ) == "01/Jul/1995:00:00:01,6\n01/Jul/1995:00:05:01,1\n01/Jul/1995:00:10:01,5\n01/Jul/1995:00:15:01,0\n01/Jul/1995:00:20:01,1\n"
Exemplo n.º 13
0
 def test_grouping(self):
     """Tests that an input file with 6 server logs from 3 unique hosts
     is correctly grouped by FindMostActive."""
     f_output_grouping = self.tmpdir.join("grouping_output.txt")
     input_grouping = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature1_grouping.txt'))
     f1_grouping = FindMostActive(input_grouping, str(f_output_grouping), 3)
     f1_grouping.parse()
     assert f1_grouping.hosts_to_hits["google.com"] == 3
     assert f1_grouping.hosts_to_hits["bing.com"] == 2
     assert f1_grouping.hosts_to_hits["askjeeves.com"] == 1
     assert f_output_grouping.read(
     ) == 'google.com,3\nbing.com,2\naskjeeves.com,1\n'
Exemplo n.º 14
0
 def test_block_window_minutes(self):
     """Test that the size of the block period can be configured."""
     f_output_configure_block = self.tmpdir.join(
         "configure_block_output.txt")
     input_configure_block = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature4_configure_block.txt'))
     f4_configure_block = FindBlockedIPs(input_configure_block,
                                         str(f_output_configure_block),
                                         block_minutes=30)
     f4_configure_block.parse()
     assert len(f4_configure_block.blocked_logs) == 2
Exemplo n.º 15
0
 def test_failed_attempts_counter(self):
     """Test that the counter of failed attempts that lead to a block
     can be configured."""
     f_output_configure_fail = self.tmpdir.join("configure_fail_output.txt")
     input_configure_fail = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature4_configure_fail.txt'))
     f4_configure_fail = FindBlockedIPs(input_configure_fail,
                                        str(f_output_configure_fail),
                                        failed_attempts=2)
     f4_configure_fail.parse()
     assert len(f4_configure_fail.blocked_logs) == 4
 def test_window_boundary(self):
     """Tests for off-by-one errors at the boundary of the sliding window: ensure that two events exactly 60 minutes
     apart are both included in the same window, but two events 60 minutes and 1 second apart are not included in the
     same window, and two events at the same clock time but separated by one day are also not included in the same
     time window."""
     f_output_boundary = self.tmpdir.join("boundary_output.txt")
     input_boundary = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature3_boundary.txt'))
     f3_boundary = FindHighestTrafficWindows(input_boundary,
                                             str(f_output_boundary),
                                             k=2)
     f3_boundary.parse()
     assert f_output_boundary.read(
     ) == "01/Jul/1995:00:00:03 -0400,2\n01/Jul/1995:00:00:04 -0400,2\n"
Exemplo n.º 17
0
 def test_window_seconds(self):
     """Test that the size of the window to look for failed login attempts 
     can be configured."""
     f_output_configure_window = self.tmpdir.join(
         "configure_window_output.txt")
     input_configure_window = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature4_configure_window.txt'))
     f4_configure_window = FindBlockedIPs(input_configure_window,
                                          str(f_output_configure_window),
                                          window_seconds=30)
     f4_configure_window.parse()
     assert len(f4_configure_window.blocked_logs) == 1
 def test_limited_to_k(self):
     """Tests that if there are more than k traffic windows, we only return
     the top k."""
     f_output_multi_entry = self.tmpdir.join("multi_entry_output.txt")
     input_multi_entry = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature3_multi_entry.txt'))
     f3_multi_entry = FindHighestTrafficWindows(input_multi_entry,
                                                str(f_output_multi_entry),
                                                k=2)
     f3_multi_entry.parse()
     assert f_output_multi_entry.read(
     ) == "01/Jul/1995:00:00:01 -0400,2\n01/Jul/1995:00:00:04 -0400,1\n"
 def test_grouping(self):
     """Tests that an input file with 6 server logs from 3 unique resources
     is correctly grouped by FindMostIntensiveResources."""
     f_output_grouping = self.tmpdir.join("grouping_output.txt")
     input_grouping = gen_test_data(
         str(os.path.dirname(__file__) + '/../test/feature2_grouping.txt'))
     f2_grouping = FindMostIntensiveResources(input_grouping,
                                              str(f_output_grouping), 3)
     f2_grouping.parse()
     assert f2_grouping.resources_to_bandwidth["/"] == 2048
     assert f2_grouping.resources_to_bandwidth["/coolstuff.gif"] == 256
     assert f2_grouping.resources_to_bandwidth["/lamestuff.gif"] == 112
     assert f_output_grouping.read(
     ) == '/\n/coolstuff.gif\n/lamestuff.gif\n'
Exemplo n.º 20
0
 def test_correct_assignment(self):
     """Test that GetHostActivityLog correctly bins incoming traffic and ignores
     other hosts."""
     f_output_assignment = self.tmpdir.join("assignment_output.txt")
     input_assignment = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature5_assignment.txt'))
     f5_assignment = GetHostActivityLog(input_assignment,
                                        str(f_output_assignment),
                                        host_to_search="199.72.81.55",
                                        minutes_per_bin=5)
     f5_assignment.parse()
     assert f_output_assignment.read(
     ) == "01/Jul/1995:00:00:01,6\n01/Jul/1995:00:05:01,1\n01/Jul/1995:00:10:01,5\n"
Exemplo n.º 21
0
 def test_failed_requests_are_reconsidered(self):
     """Test that FindBlockedIPs reconsiders a failed login attempt
     that does *not* lead to a block if it might be the beginning of
     a later window of failed login attempts."""
     f_output_reconsider = self.tmpdir.join("reconsider_output.txt")
     input_reconsider = gen_test_data(
         str(
             os.path.dirname(__file__) +
             '/../test/feature4_fail_reconsider.txt'))
     f4_reconsider = FindBlockedIPs(input_reconsider,
                                    str(f_output_reconsider))
     f4_reconsider.parse()
     assert len(f4_reconsider.blocked_logs) == 1
     assert f4_reconsider.blocked_logs[0].timestamp == datetime.strptime(
         "01/Jul/1995:00:04:05", '%d/%b/%Y:%H:%M:%S')
Exemplo n.º 22
0
def build_and_train():
    model = build_model()

    # generate training and test dataset
    training_set = gen_training_data.gen_training_data()
    test_set = gen_test_data.gen_test_data()

    # to view the training class indices
    # training_set.class_indices

    # TRAINING THE CLASSIFIER
    model.fit_generator(training_set,
                        steps_per_epoch=4000,
                        epochs=15,
                        validation_data=test_set,
                        validation_steps=1000)

    model.save('classifier.hd5')
Exemplo n.º 23
0
    def test_input_data_sort(self):
        """Test that FindBlockedIPs sorts the input server logs
        by host and time."""
        f_output_sort = self.tmpdir.join("sort_output.txt")
        input_sort = gen_test_data(
            str(os.path.dirname(__file__) + '/../test/feature4_sort.txt'))
        f4_sort = FindBlockedIPs(input_sort, str(f_output_sort))
        f4_sort.parse()
        # first confirm that hosts are sorted in lexicographic order
        assert f4_sort.server_log[0].host == "bing.com"
        assert f4_sort.server_log[1].host == "bing.com"
        assert f4_sort.server_log[2].host == "google.com"
        assert f4_sort.server_log[3].host == "google.com"

        # then confirm that events are ascending in time within a host
        assert f4_sort.server_log[0].bytes == 56
        assert f4_sort.server_log[1].bytes == 1024
        assert f4_sort.server_log[2].bytes == 256
        assert f4_sort.server_log[3].bytes == 1024
Exemplo n.º 24
0
    def test_blocked_requests_not_reconsidered(self):
        """Test that FindBlockedIPs does not start a new block
        window if there are three failed login attempts within
        a time period that is already blocked."""
        f_output_not_reconsider = self.tmpdir.join("not_reconsider_output.txt")
        input_not_reconsider = gen_test_data(
            str(
                os.path.dirname(__file__) +
                '/../test/feature4_block_not_reconsider.txt'))
        f4_not_reconsider = FindBlockedIPs(input_not_reconsider,
                                           str(f_output_not_reconsider))
        f4_not_reconsider.parse()
        assert len(f4_not_reconsider.blocked_logs) == 3

        # test that last request is not blocked
        # final request falls outside the block window established by the first three unsuccessful requests
        # but would fall within the block window of the unsuccessful requests that were blocked
        assert f4_not_reconsider.blocked_logs[
            2].timestamp == datetime.strptime("01/Jul/1995:00:04:05",
                                              '%d/%b/%Y:%H:%M:%S')