示例#1
0
    def readrows(self):
        """The readrows method reads simply 'combines' the rows of
           multiple files OR gunzips the file and then reads the rows
        """

        # For each file (may be just one) create a BroLogReader and use it
        for self._filepath in self._files:

            # Check if the file is zipped
            if self._filepath.endswith('.gz'):
                tmp = tempfile.NamedTemporaryFile(delete=False)
                with gzip.open(self._filepath, 'rb') as f_in, open(tmp.name, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

                # Set the file path to the new temp file
                self._filepath = tmp.name

            # Create a BroLogReader
            reader = bro_log_reader.BroLogReader(self._filepath)
            for row in reader.readrows():
                yield row

            # Clean up any temp files
            try:
                os.remove(tmp.name)
                print('Removed temporary file {:s}...'.format(tmp.name))
            except:
                pass
示例#2
0
def parseLOG(filename):
    """ Generate a list of Dumont Requests from a bro log file 
        
        Parameters
        ----------
        filename : string
            path to .pcap file to parse.
        
        Returns
        -------
        result : list of DumontLog()
            ordered list of dumont logs.
            
        """
    DumontRequests = []
    
    bro_log = bro_log_reader.BroLogReader(filename)
    data = pd.DataFrame(bro_log.readrows())
    data['header_values'] = data['header_values'].apply(__parseHeaderValues__)
    
    for d in data.iterrows():
        if d[1]['method'] == 'GET' or  d[1]['method'] == 'POST':
            DumontRequests.append(DumontLog(d[1]))
        
    return aggregateTemporalFeatures(DumontRequests)
示例#3
0
def bro_http_parser(inFile):
    #reader = bro_log_reader.BroLogReader('/Users/Gary/PycharmProjects/Aktaion/data/broData/ExploitExample/http.log')
    reader = bro_log_reader.BroLogReader(inFile)
    dictionaryIndex = 1
    masterDictionary = {}
    for row in reader.readrows():

        #cast row dictionary into a new dictionary to provide legacy variable names
        row["idOrigHost"] = row.pop('id.orig_h')
        row["idOrigPort"] = row.pop('id.orig_p')
        row["idRespHost"] = row.pop('id.resp_h')
        row["idRespPort"] = row.pop('id.resp_p')
        row["transDepth"] = row.pop('trans_depth')
        row["userAgent"] = row.pop('user_agent')
        row["requestBodyLen"] = row.pop('request_body_len')
        row["responseBodyLen"] = row.pop('response_body_len')
        row["statusCode"] = row.pop('status_msg')
        row["epochTime"] = row.pop('ts')

        if row["idRespHost"] == 443:
            fUrl = {"fullUrl": "https:\\" + row["host"] + row["uri"]}
        else:
            fUrl = {"fullUrl": "https:\\" + row["host"] + row["uri"]}
        row.update(fUrl)
        masterDictionary[dictionaryIndex] = row
        dictionaryIndex += 1
        #
        # row.update(row1) #TODO return just row1 with all 27 relabeled fields + fullUrl
        #pprint(row)

    return (masterDictionary)
示例#4
0
    def parseFile(self, filename, json=False):
        """ Creates a pandas dataframe from given brofile

            Parameters
            ----------
            filename : string
                Path to file to be parsed

            Returns
            -------
            result : pd.DataFrame
                Pandas dataframe containing bro log file
            """
        df = None
        if not json:
            bro_log = bro_log_reader.BroLogReader(filename)
            df = pd.DataFrame(bro_log.readrows())
        else:
            df = pd.read_json(filename, lines=True)
            #df.rename(
            #    index=str,
            #    columns={
            #        'client_header_names': 'header_values'},
            #    inplace=True)
        df['header_values'] = df['header_values'].apply(
            self.__parseHeaderValues__)
        return df
示例#5
0
def searcher():
	es = Elasticsearch("127.0.0.1:9200")
	reader = bro_log_reader.BroLogReader("http.log")
	l = []

        for row_dict in reader.readrows():  # The result is list of json objects
                print "\n"
                try:
                        ip = row_dict["id.resp_h"]  # Getting the source IP field, it is inside a try statement cause not every query hits will have this field
                except:
                        continue
                if ip not in l:  # Query only if the IP has not been queried yet
                        l.append(ip)  # Appending it in the list
                        nyasro = obj.lookup_ip(ip)  # Talos lookup
                        if "not fetch" not in str(nyasro):  # If the talos lookup did not failed
				 if nyasro["web_reputation"] == "Poor" or nyasro["email_reputation"] == "Poor":
                                        nyasro["@timestamp"] = datetime.now().isoformat()
                                        nyasro["destination_ip"] = row_dict["id.resp_h"]
                                        nyasro["source_ip"] = row_dict["id.orig_h"]
                                        nyasro["source_port"] = row_dict["id.orig_p"]
                                        nyasro["destination_port"] = row_dict["id.resp_p"]
                                        print nyasro

					try:
                                		es.create(index = "threat-intel",doc_type="threat",body = nyasro)
                                		print "sucess"
                        		except:
                                		es.index(index = "threat-intel",doc_type="threat",body = nyasro) 
示例#6
0
def scan_files(c):
    s = SlackClient("api key here")
    c.h1("\n\nMaliciou Files Downloaded")
    message = ""
    malicious_files_downloaded = []
    filename = ''
    reader = bro_log_reader.BroLogReader(
        "download.log")  # Reading from this log file

    for row in reader.readrows():  # Reading each row from the bro logs
        try:
            if row['method'] == 'GET':
                filename = row['host'] + row['uri']
                print filename
                if filename not in malicious_files_downloaded:
                    malicious_files_downloaded.append(filename)
                    try:
                        r = scan(filename)
                    except:
                        continue
                    if r and r[1] > 0:  # if malicious
                        message = message + "A mal file was downloaded {}\n".format(
                            filename)
                        break

        except:
            pass
    print "\n\nWriting to pdf"
    c.p(message)
    s.api_call("chat.postMessage", channel='project', text=message)
示例#7
0
    def bro_http_to_df(inFile):
        """Parses a Bro http.log file, returns a pandas data frame"""
        if not inFile.endswith('log'):
            print('This method only works with Bro http.log files, the file ' + inFile + ' is not valid.' )
            sys.exit(1)

        reader = bro_log_reader.BroLogReader(inFile)
        bro_df = pd.DataFrame(reader.readrows())
        bro_df = broParse.add_full_URL(bro_df)
        bro_df = broParse.normalize_bro(bro_df)

        return(bro_df)
示例#8
0
 def parseFile(self, filename):
     """ Creates a pandas dataframe from given brofile
         
         Parameters
         ----------
         filename : string
             Path to file to be parsed
             
         Returns
         -------
         result : pd.DataFrame
             Pandas dataframe containing bro log file
         """
     bro_log = bro_log_reader.BroLogReader(filename)
     data = pd.DataFrame(bro_log.readrows())
     data['header_values'] = data['header_values'].apply(self.__parseHeaderValues__)
     return data
示例#9
0
    def __init__(self, filepath, eps=10, max_rows=None):
        """Initialization for the LiveSimulator Class
           Args:
               eps (int): Events Per Second that the simulator will emit events (default = 10)
               max_rows (int): The maximum number of rows to generate (default = None (go forever))
        """

        # Compute EPS timer
        # Logic:
        #     - Normal distribution centered around 1.0/eps
        #     - Make sure never less than 0
        #     - Precompute 1000 deltas and then just cycle around
        self.eps_timer = itertools.cycle([max(0, delta) for delta in np.random.normal(1.0/float(eps), .5/float(eps), size=1000)])

        # Initialize the Bro log reader
        self.log_reader = bro_log_reader.BroLogReader(filepath, tail=False)

        # Store max_rows
        self.max_rows = max_rows
示例#10
0
    def test_write_bro(self):
        # Extract Data
        self.exportObj.extractFromDB()
        # Reduce Threats
        self.prune_threats()
        # Write Test File
        self.exportObj.writeBro()
        # Set the file string name
        file_string = self.exportObj.fileString + '.bro'

        # Load the test file into a new expected format
        reader = bro_log_reader.BroLogReader(file_string)

        # Add each imported dictionary into a list
        full_list = []
        for row in reader.readrows():
            tmp_dict = dict()
            tmp_dict.update(row)
            full_list.append(tmp_dict)

        # Test Dictionary
        self.assertNotEqual(full_list, [])
示例#11
0
def http_analyzer(c):
    msg = ''
    c.h1("\n\nHTTP_HUNT")
    es = Elasticsearch("localhost:9200")
    reader = bro_log_reader.BroLogReader(
        "http.log"
    )  # This object reads from the log from the mentioned location
    n = datetime.now()
    # THe below line has been commented, if info about the key and value pairs is requierd can be uncommented to see them
    url_list = []  # just a sample malicious site
    i = 0
    my_dict = {}  # will hold the json value

    for row_dict in reader.readrows():
        print "\n\n"
        print "\n\n"
        try:
            row_dict['host']  # Sometimes this key may not be present
            if row_dict['host'] == '-':
                continue
            pass  # If key is present go ot the next script
        except:
            continue  # If key is not present go to top of loop

        if row_dict['host'] not in url_list:  # Getting only the URLs
            url_list.append(row_dict['host'])
            if i == 0:
                pass
            elif i % 4 == 0:  # Becasue the limitation of VIrustotal PUblci API is 4 scans per minute
                print "[*]NEED A TIMEOUT \n"
                time.sleep(60)
            try:
                r = scan(row_dict['host'])
            except:
                continue
            if r and r[1] > 0:  # if malicious
                print r
                print "a malicious site"
                my_dict["src_ip"] = row_dict["id.orig_h"]
                my_dict["src_port"] = row_dict["id.orig_p"]
                my_dict["dest_ip"] = row_dict["id.resp_h"]
                my_dict["dest_port"] = row_dict["id.resp_p"]
                my_dict["refferer"] = row_dict["referrer"]
                my_dict["method"] = row_dict["method"]
                my_dict["link"] = row_dict['host']
                my_dict["type"] = "HTTP"
                my_dict["@timestamp"] = datetime.now().isoformat()
                my_dict["ip_void"] = "http://www.ipvoid.com/scan/{}".format(
                    row_dict["id.resp_h"])
                my_dict[
                    "sender_base"] = "http://www.senderbase.org/lookup/?search_string={}".format(
                        row_dict["id.resp_h"])
                my_dict[
                    "virustotal"] = "https://www.virustotal.com/en/ip-address/{}/information/".format(
                        row_dict["id.resp_h"])
                my_dict["threat-intel-source"] = "Virustotal"
                msg = msg + "Source_ip: {}\nDestination_ip: {}\nSource_port: {}\nDestination_port: {}\nDomain:{}\n".format(
                    my_dict["src_ip"], my_dict["dest_ip"], my_dict["src_port"],
                    my_dict["dest_port"], my_dict["link"])
                body = json.dumps(my_dict)
                try:
                    es.create(index="malicious_website",
                              doc_type="practise",
                              body=body)
                    print "sucess"
                except:
                    es.index(index="malicious_website",
                             doc_type="practise",
                             body=body)
                    print "wut"
                my_dict = {}
            else:
                pass
        i = i + 1
        c.p(msg)
def detect(file, amountanom, realtime):
    """
    Functon to apply a very simple anomaly detector
    amountanom: The top number of anomalies we want to print
    realtime: If we want to read the conn.log file in real time (not working)
    """

    # Create a zeek reader on a given log file. Thanks brothon
    reader = bro_log_reader.BroLogReader(file, tail=realtime)
    # Create a Pandas dataframe from reader
    bro_df = pd.DataFrame(reader.readrows())

    # In case you need a label, due to some models being able to work in a semisupervized mode, then put it here. For now everything is 'normal', but we are not using this for detection
    bro_df['label'] = 'normal'
    # Change the datetime delta value to seconds. Scikit does not now how to work with timedeltas
    bro_df['durationsec'] = bro_df.duration.apply(lambda x: x.total_seconds())
    # Replace the rows without data (with '-') with -1. Even though this may add a bias in the algorithms, is better than not using the lines.
    bro_df['orig_bytes'] = bro_df['orig_bytes'].replace(to_replace='-',
                                                        value=-1)
    bro_df['resp_bytes'] = bro_df['resp_bytes'].replace(to_replace='-',
                                                        value=-1)
    bro_df['resp_pkts'] = bro_df['resp_pkts'].replace(to_replace='-', value=-1)
    bro_df['orig_ip_bytes'] = bro_df['orig_ip_bytes'].replace(to_replace='-',
                                                              value=-1)
    bro_df['resp_ip_bytes'] = bro_df['resp_ip_bytes'].replace(to_replace='-',
                                                              value=-1)

    # Add the columns from the log file that we know are numbers. This is only for conn.log files.
    X_train = bro_df[[
        'durationsec', 'orig_bytes', 'id.resp_p', 'resp_bytes',
        'orig_ip_bytes', 'resp_pkts', 'resp_ip_bytes'
    ]]
    # Our y is the label. But we are not using it now.
    y = bro_df.label

    # The X_test is where we are going to search for anomalies. In our case, its the same set of data than X_train.
    X_test = X_train

    #################
    # Select a model from below

    # ABOD class for Angle-base Outlier Detection. For an observation, the variance of its weighted cosine scores to all neighbors could be viewed as the outlying score.
    #clf = ABOD()

    # LOF
    #clf = LOF()

    # CBLOF
    #clf = CBLOF()

    # LOCI
    #clf = LOCI()

    # LSCP
    #clf = LSCP()

    # MCD
    #clf = MCD()

    # OCSVM
    #clf = OCSVM()

    # PCA. Good and fast!
    clf = PCA()

    # SOD
    #clf = SOD()

    # SO_GAAL
    #clf = SO_GALL()

    # SOS
    #clf = SOS()

    # XGBOD
    #clf = XGBOD()

    # KNN
    # Good results but slow
    #clf = KNN()
    #clf = KNN(n_neighbors=10)
    #################

    # Fit the model to the train data
    clf.fit(X_train)

    # get the prediction on the test data
    y_test_pred = clf.predict(X_test)  # outlier labels (0 or 1)
    y_test_scores = clf.decision_function(X_test)  # outlier scores

    # Convert the ndarrays of scores and predictions to  pandas series
    scores_series = pd.Series(y_test_scores)
    pred_series = pd.Series(y_test_pred)

    # Now use the series to add a new column to the X test
    X_test['score'] = scores_series.values
    X_test['pred'] = pred_series.values

    # Add the score to the bro_df also. So we can show it at the end
    bro_df['score'] = X_test['score']

    # Keep the positive predictions only. That is, keep only what we predict is an anomaly.
    X_test_predicted = X_test[X_test.pred == 1]

    # Keep the top X amount of anomalies
    top10 = X_test_predicted.sort_values(by='score',
                                         ascending=False).iloc[:amountanom]

    ## Print the results
    # Find the predicted anomalies in the original bro dataframe, where the rest of the data is
    df_to_print = bro_df.iloc[top10.index]
    print('\nFlows of the top anomalies')
    # Only print some columns, not all, so its easier to read.
    df_to_print = df_to_print.drop([
        'conn_state', 'history', 'local_orig', 'local_resp', 'missed_bytes',
        'ts', 'tunnel_parents', 'uid', 'label'
    ],
                                   axis=1)
    print(df_to_print)
示例#13
0
            ]
        elif 'dns' in args.bro_log:
            log_type = 'dns'
            features = [
                'Z', 'rejected', 'proto', 'query', 'qclass_name', 'qtype_name',
                'rcode_name', 'query_length', 'answer_length', 'entropy'
            ]
        else:
            print(
                'This example only works with Bro with http.log or dns.log files..'
            )
            sys.exit(1)

        # Create a Bro IDS log reader
        print('Opening Data File: {:s}'.format(args.bro_log))
        reader = bro_log_reader.BroLogReader(args.bro_log)

        # Create a Pandas dataframe from reader
        bro_df = pd.DataFrame(reader.readrows())
        print('Read in {:d} Rows...'.format(len(bro_df)))

        # Using Pandas we can easily and efficiently compute additional data metrics
        # Here we use the vectorized operations of Pandas/Numpy to compute query length
        # We'll also compute entropy of the query
        if log_type == 'dns':
            bro_df['query_length'] = bro_df['query'].str.len()
            bro_df['answer_length'] = bro_df['answers'].str.len()
            bro_df['entropy'] = bro_df['query'].map(lambda x: entropy(x))

        # Use the BroThon DataframeToMatrix class
        to_matrix = dataframe_to_matrix.DataFrameToMatrix()
示例#14
0
# Local imports
from brothon import bro_log_reader

if __name__ == '__main__':
    # Example to run the bro log reader on a given file

    # Collect args from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument('-f', '--bro-log', type=str, help='Specify a bro log to run BroLogReader test on')
    parser.add_argument('-t', '--tail', action='store_true', help='Turn on log tailing')
    args, commands = parser.parse_known_args()

    # Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # If no args just call help
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Run the bro reader on a given log file
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.tail)
        for row in reader.readrows():
            pprint(row)
示例#15
0
#!/usr/bin/python
from brothon import bro_log_reader
from file import scan
from elasticsearch import Elasticsearch
import time
import json

reader = bro_log_reader.BroLogReader("dns.log")
dns_replier_list = []
dns_requested_url = []

my_dict = {}


def put_in_string(message, my_dict):
    message = message + "\n\nSource_IP: {}\nSource_port: {}\nDestination_ip: {}\nDestination_port: {}\nQuery: {}\n".format(
        my_dict["src_ip"], my_dict["src_port"], my_dict["dest_ip"],
        my_dict["dest_port"], my_dict["query"])
    return message


def search_dns(c):
    i = 0
    dns_replier_list = []
    dns_requested_url = []

    my_dict = {}

    message = ""
    es = Elasticsearch("localhost:9200")
    for row in reader.readrows():
示例#16
0
            print('more')
            print(json_response['permalink'])
        else:
            print('This file cannot be verified')

    except BaseException as e:
        print(e)


if __name__ == '__main__':
    """Run a VirusTotal Query on Extracted File Hashes"""

    index = 1

    try:
        # Run the bro reader on a given log file
        reader = bro_log_reader.BroLogReader('files.log')

        print('Examination result (positives / total number of vaccines)')

        # Use Sha1 hash algorithm
        for row in reader.readrows():
            print(index)
            index += 1

            if (row['sha1'] != '-'):
                checkVirus(row['sha1'])

    except BaseException as e:
        print(e)
示例#17
0
        parser.print_help()
        sys.exit(1)

    # Sanity check that this is a file log
    if not args.bro_log.endswith('files.log'):
        print('This example only works with Bro files.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a VirusTotal Query Class
        vtq = vt_query.VTQuery()

        # Run the bro reader on a given log file
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)
        for row in reader.readrows():
            file_sha = row.get('sha256', '-')  # Bro uses - for empty field
            if file_sha == '-':
                file_sha = row.get('sha1', '-')  # Bro uses - for empthy field
                if file_sha == '-':
                    print(
                        'Should not find a sha256 or a sha1 key! Skipping...')
                    continue

            # Make the query with either sha
            results = vtq.query_file(file_sha)
            if results.get('positives', 0) > 1:  # At least two hits
                pprint(results)