Exemplo n.º 1
0
def test():
    """Test for DataFrame Stats module"""

    import os
    from bat.utils import file_utils

    # Open a dataset (relative path)
    data_dir = file_utils.relative_dir(__file__, 'test_data')
    file_path = os.path.join(data_dir, 'g_test_data.csv')
    dataframe = pd.read_csv(file_path)
    print(dataframe.head())

    # Print out the contingency_table
    print('\nContingency Table')
    print(contingency_table(dataframe, 'name', 'status'))

    # Print out the joint_distribution
    print('\nJoint Distribution Table')
    print(joint_distribution(dataframe, 'name', 'status'))

    # Print out the expected_counts
    print('\nExpected Counts Table')
    print(expected_counts(dataframe, 'name', 'status'))

    # Print out the g_test scores
    print('\nG-Test Scores')
    print(g_test_scores(dataframe, 'name', 'status'))
Exemplo n.º 2
0
def test():
    """Test for FileTailer Python Class"""

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../../data')
    test_path = os.path.join(data_path, 'http.log')
    print('Opening Data File: {:s}'.format(test_path))

    # Create the Class
    tailer = FileTailer(test_path, tail=False)  # First with no tailing
    for line in tailer.readlines():
        print(line)
    print('Read with NoTail Test successful!')

    # Now include tailing (note: as an automated test this needs to timeout quickly)
    try:
        from interruptingcow import timeout

        # Spin up the class
        tailer = FileTailer(test_path)  # Tail = True

        # Tail the file for 2 seconds and then quit
        try:
            with timeout(2, exception=RuntimeError):
                for line in tailer.readlines():
                    print(line)
        except RuntimeError:  # InterruptingCow raises a RuntimeError on timeout
            print('Tailing Test successful!')

    except ImportError:
        print('Tailing Test not run, need interruptcow module...')
Exemplo n.º 3
0
def test():
    """Test for LogToDataFrame Class"""
    import os
    pd.set_option('display.width', 1000)
    from bat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    test_path = os.path.join(data_path, 'http.log')

    # Convert it to a Pandas DataFrame
    http_df = LogToDataFrame(test_path)

    # Print out the head
    print(http_df.head())

    # Print out the datatypes
    print(http_df.dtypes)

    # Test an empty log (a log with header/close but no data rows)
    test_path = os.path.join(data_path, 'http_empty.log')
    http_df = LogToDataFrame(test_path)

    # Print out the head
    print(http_df.head())

    # Print out the datatypes
    print(http_df.dtypes)

    print('LogToDataFrame Test successful!')
Exemplo n.º 4
0
def test():
    """Test for BroLogReader Python Class"""
    import pytest

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')

    # For each file, create the Class and test the reader
    files = [
        'app_stats.log', 'conn.log', 'dhcp.log', 'dns.log', 'files.log',
        'ftp.log', 'http.log', 'notice.log', 'smtp.log', 'ssl.log',
        'weird.log', 'x509.log'
    ]
    for bro_log in files:
        test_path = os.path.join(data_path, bro_log)
        print('Opening Data File: {:s}'.format(test_path))
        reader = BroLogReader(test_path, tail=False)  # First with no tailing
        for line in reader.readrows():
            print(line)
    print('Read with NoTail Test successful!')

    # Test an empty log (a log with header/close but no data rows)
    test_path = os.path.join(data_path, 'http_empty.log')
    reader = BroLogReader(test_path)
    for line in reader.readrows():
        print(line)

    # Test some of the error conditions
    reader.field_names = ['good', 'error']
    reader.type_converters = [
        int, lambda x: datetime.datetime.fromtimestamp(float(x))
    ]
    reader.make_dict([5, '0, .5, .5'])

    # Test invalid file path
    with pytest.raises(IOError):
        BroLogReader('nowhere.log')

    # Now include tailing (note: as an automated test this needs to timeout quickly)
    try:
        from interruptingcow import timeout

        # Spin up the class
        tailer = BroLogReader(test_path, tail=True)

        # Tail the file for 2 seconds and then quit
        try:
            with timeout(2, exception=RuntimeError):
                for line in tailer.readrows():
                    print(line)
        except RuntimeError:  # InterruptingCow raises a RuntimeError on timeout
            print('Tailing Test successful!')

    except ImportError:
        print('Tailing Test not run, need interruptcow module...')
Exemplo n.º 5
0
def test():
    """Test the DirWatcher Class"""
    watch_path = file_utils.relative_dir(__file__, '../../data')
    print('Watching Directory: %s' % watch_path)
    DirWatcher(watch_path, my_callback)

    # Create a file and then delete it
    temp_file = os.path.join(watch_path, 'test.tmp')
    open(temp_file, 'w').close()
    time.sleep(1)
    os.remove(temp_file)
Exemplo n.º 6
0
def test():
    """Test for LiveSimulator Python Class"""

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    test_path = os.path.join(data_path, 'conn.log')
    print('Opening Data File: {:s}'.format(test_path))

    # Create a LiveSimulator reader
    reader = LiveSimulator(test_path, max_rows=10)
    for line in reader.readrows():
        print(line)
    print('Read with max_rows Test successful!')
Exemplo n.º 7
0
def test():
    """Test for methods in this file"""
    import os
    pd.set_option('display.width', 1000)
    from bat.dataframe_to_parquet import parquet_to_df
    from bat.log_to_dataframe import LogToDataFrame
    from bat.utils import file_utils
    import tempfile

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    log_path = os.path.join(data_path, 'dns.log')

    # Convert the log to a Pandas DataFrame
    log_to_df = LogToDataFrame()
    dns_df = log_to_df.create_dataframe(log_path)

    # Print out the head
    print(dns_df.head())

    # Create a temporary file
    filename = tempfile.NamedTemporaryFile(delete=False).name

    # Write to a parquet file
    log_to_parquet(log_path, filename)

    # Read from the parquet file
    new_dns_df = parquet_to_df(filename)

    # Remove temp file
    os.remove(filename)

    # Print out the head
    print(new_dns_df.head())

    # Make sure our conversions didn't lose type info
    # Note: This is no longer going to work
    #       See:  # See: https://issues.apache.org/jira/browse/ARROW-5379
    # assert(dns_df.dtypes.values.tolist() == new_dns_df.dtypes.values.tolist())

    # Test an empty log (a log with header/close but no data rows)
    test_path = os.path.join(data_path, 'http_empty.log')
    filename = tempfile.NamedTemporaryFile(delete=False).name
    log_to_parquet(test_path, filename)
    parquet_to_df(filename)
    os.remove(filename)

    print('DataFrame to Parquet Tests successful!')
Exemplo n.º 8
0
def test():
    """Test for BroMultiLogReader Python Class"""
    from bat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')

    # For each file, create the Class and test the reader
    files = ['http.log.gz', 'dhcp*.log', 'dhcp*.log.gz']
    for bro_log in files:
        test_path = os.path.join(data_path, bro_log)
        print('Opening Data File: {:s}'.format(test_path))
        reader = BroMultiLogReader(test_path)
        for line in reader.readrows():
            print(line)
    print('Tests successful!')
Exemplo n.º 9
0
def test():
    """Test for methods in this file"""
    import os
    pd.set_option('display.width', 1000)
    from bat.dataframe_to_parquet import parquet_to_df
    from bat.log_to_dataframe import LogToDataFrame
    from bat.utils import file_utils
    import tempfile

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    test_path = os.path.join(data_path, 'dns.log')

    # Convert the log to a Pandas DataFrame
    dns_df = LogToDataFrame(test_path)

    # Print out the head
    print(dns_df.head())

    # Create a temporary file
    filename = tempfile.NamedTemporaryFile(delete=False).name

    # Write to a parquet file
    log_to_parquet(test_path, filename)

    # Read from the parquet file
    new_dns_df = parquet_to_df(filename)

    # Remove temp file
    os.remove(filename)

    # Print out the head
    print(new_dns_df.head())

    assert (dns_df.dtypes.values.tolist() == new_dns_df.dtypes.values.tolist())

    # Test an empty log (a log with header/close but no data rows)
    test_path = os.path.join(data_path, 'http_empty.log')
    filename = tempfile.NamedTemporaryFile(delete=False).name
    log_to_parquet(test_path, filename)
    parquet_to_df(filename)
    os.remove(filename)

    print('DataFrame to Parquet Tests successful!')
Exemplo n.º 10
0
def test():
    """Test for LogToDataFrame Class"""
    import os
    pd.set_option('display.width', 1000)
    from bat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    log_path = os.path.join(data_path, 'conn.log')

    # Convert it to a Pandas DataFrame
    log_to_df = LogToDataFrame()
    my_df = log_to_df.create_dataframe(log_path)

    # Print out the head
    print(my_df.head())

    # Print out the datatypes
    print(my_df.dtypes)

    # Test a bunch
    tests = [
        'app_stats.log', 'dns.log', 'http.log', 'notice.log', 'tor_ssl.log',
        'conn.log', 'dhcp_002.log', 'files.log', 'smtp.log', 'weird.log',
        'ftp.log', 'ssl.log', 'x509.log'
    ]
    for log_path in [os.path.join(data_path, log) for log in tests]:
        print('Testing: {:s}...'.format(log_path))
        my_df = log_to_df.create_dataframe(log_path)
        print(my_df.head())
        print(my_df.dtypes)

    # Test an empty log (a log with header/close but no data rows)
    log_path = os.path.join(data_path, 'http_empty.log')
    my_df = log_to_df.create_dataframe(log_path)

    # Print out the head
    print(my_df.head())

    # Print out the datatypes
    print(my_df.dtypes)

    print('LogToDataFrame Test successful!')
Exemplo n.º 11
0
def test():
    """Test for methods in this file"""
    import os
    pd.set_option('display.width', 1000)
    from bat.dataframe_to_parquet import parquet_to_df
    from bat.log_to_dataframe import LogToDataFrame
    from bat.utils import file_utils
    import tempfile

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    test_path = os.path.join(data_path, 'dns.log')

    # Convert the log to a Pandas DataFrame
    dns_df = LogToDataFrame(test_path)
    # dns_df.reset_index(inplace=True)

    # Print out the head
    print(dns_df.head())

    # Create a temporary file
    filename = tempfile.NamedTemporaryFile(delete=False).name

    # Write to a parquet file
    log_to_parquet(test_path, filename)

    # Read from the parquet file
    new_dns_df = parquet_to_df(filename)

    # Remove temp file
    os.remove(filename)

    # Print out the head
    print(new_dns_df.head())

    # Make sure our conversions didn't lose type info
    # TODO: Uncomment this test when the following PR is fixed
    #       - TimeDelta Support: https://issues.apache.org/jira/browse/ARROW-835
    # assert(dns_df.dtypes.values.tolist() == new_dns_df.dtypes.values.tolist())

    print('DataFrame to Parquet Tests successful!')