예제 #1
0
def find_blacklisted_ipvoid_mp(arglist):
    global CURR_DONE
    global TOTAL_TO_DO

    check_list, customer, result_type = arglist

    # Get destination ip and list of sources it connected to
    dst      = check_list[0]
    src_list = check_list[1]

    # Report progress
    with CURR_DONE_LOCK:
        CURR_DONE.value += 1
        local_curr_done = CURR_DONE.value
        if (local_curr_done % 10 == 0) or (local_curr_done == TOTAL_TO_DO.value):
            progress_bar(local_curr_done, TOTAL_TO_DO.value)

    response = ""
    try:
        response = urllib2.urlopen('http://www.ipvoid.com/scan/' + dst)
    except:
        return

    html = response.read().decode('utf-8')
    if 'BLACKLISTED' in html:
        line_splt = html.split('BLACKLISTED ')
        times_seen = int(line_splt[1].split('/')[0])
        for src in src_list:
            write_data([src, dst, times_seen], customer, result_type)
    response.close()
예제 #2
0
def find_blacklisted_ipvoid_mp(arglist):
    global CURR_DONE
    global TOTAL_TO_DO

    check_list, customer, result_type = arglist

    # Get destination ip and list of sources it connected to
    dst = check_list[0]
    src_list = check_list[1]

    # Report progress
    with CURR_DONE_LOCK:
        CURR_DONE.value += 1
        local_curr_done = CURR_DONE.value
        if (local_curr_done % 10 == 0) or (local_curr_done
                                           == TOTAL_TO_DO.value):
            progress_bar(local_curr_done, TOTAL_TO_DO.value)

    response = ""
    try:
        response = urllib2.urlopen('http://www.ipvoid.com/scan/' + dst)
    except:
        return

    html = response.read().decode('utf-8')
    if 'BLACKLISTED' in html:
        line_splt = html.split('BLACKLISTED ')
        times_seen = int(line_splt[1].split('/')[0])
        for src in src_list:
            write_data([src, dst, times_seen], customer, result_type)
    response.close()
예제 #3
0
def find_long_urls(customer, threshold, result_type):
    # searching for duration in log files, not results
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, URL]
    
    # restrict results to specified customer
    constraints = []
    
    # anything we want to filter out
    ignore = []

    scroll_id = ""

    scroll_len = 1000

    scrolling = True

    print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...')

    url_dict = {}

    count = 0
    error_count = 0

    while scrolling:

        # Retrieve data, which will come in sorted by longest entry for url field
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len)
              
        progress_bar(count, scroll_size)   
        for i in hits:
            count += 1

            try:
                url = i['fields'][URL][0]
                data = i['fields']
            except:
                error_count += 1
                continue

            key = len(url)

            # If key already exists, append the data, otherwise create new key with list that holds data
            if key in url_dict.keys():            
                url_dict[key].append(data)
            else:
                url_dict[key] = [data]

        

        if len(hits) < 1:
            scrolling = False

    # Get total number of keys (unique url lengths)
    total_keys = len(url_dict)

    # Verify that ES query actually returned some results
    if not total_keys == 0:
        print '>>> Finding the longest URLS... '
        final_res = []
        key_count = 0
        keys = sorted(url_dict.keys(), reverse=True)
        done = False

        # Get threshold amount of longest urls
        for url_length in keys:            
            if done == True:
                break
            for entry in url_dict[url_length]:
                if (key_count % 10 == 0) or (key_count == threshold):
                    progress_bar(key_count, threshold)
                key_count += 1
                if key_count > threshold:
                    done = True
                    break
                else:
                    final_res.append(entry)

        # WRITE THE DATA
        write_count = 0
        write_total = len(final_res)
        print '>>> Writing results of analysis...'
        for data in final_res:
            write_count += 1
            if (write_count % 10 == 0) or (write_count == write_total):        
                progress_bar(write_count, write_total)     
            write_data(data, customer, result_type)
            
            
    else:
        print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
예제 #4
0
파일: beaconing.py 프로젝트: xia0pin9/RITA
def beacon_analysis(customer, proto, result_type):

    global TOTAL_TO_DO
    global CURR_DONE
    global TIME_DICT
    CURR_DONE.value = 0
    worker_pool = Pool(processes=None, maxtasksperchild=1)

    # searching for beacons in log files, not results
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, PROTOCOL, TIMESTAMP]

    if proto != "":
        # restrict results to specified customer
        constraints = [{PROTOCOL: proto}]
    else:
        constraints = []

    # anything we want to filter out
    ignore = []

    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    print(
        colors.bcolors.OKBLUE +
        '>>> Retrieving information from elasticsearch and building a dictionary... '
    )

    # start index for progress bar
    count = 0
    error_count = 0

    # Build a dictionary for beacon detection
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(
            customer, doc_type, fields, constraints, ignore, scroll_id,
            scroll_len)

        for entry in hits:
            count += 1

            try:
                # create dictionary key
                key = (entry['fields'][SOURCE_IP][0],
                       entry['fields'][DESTINATION_IP][0],
                       entry['fields'][DESTINATION_PORT][0])

                # append timestamp to dictionary under unique key
                dt = dt_parser.parse(entry['fields'][TIMESTAMP][0])
                ts = time.mktime(dt.timetuple())
                TIME_DICT[key].append(int(ts))

            except:
                error_count += 1
                continue

        # Report progress
        progress_bar(count, scroll_size)

        # stop scrolling if no more hits
        if count == scroll_size:
            scrolling = False

    if not (len(TIME_DICT) == 0):
        # parallelize it
        m = Manager()
        db_queue = m.Queue()
        n_cores = multiprocessing.cpu_count()
        print('>>> Found ' + str(n_cores) + ' core(s)!')

        # create parameter list for threads and keys
        arglist = []
        for key in TIME_DICT:
            arglist.append((key, db_queue))

        # determine the total number of keys to be split up amongst threads
        TOTAL_TO_DO.value = len(arglist)

        # run the fft mapping
        print ">>> Running beacon analysis... "
        worker_pool.map(perform_fft_mp, iterable=arglist, chunksize=1000)

        # Write results to elasticsearch
        while not db_queue.empty():
            vals = []
            try:
                vals = db_queue.get()
                n_vals = len(list(vals))
            except:
                break

            write_data(vals, customer, proto, result_type)
    else:
        print(
            colors.bcolors.WARNING +
            '[!] Querying elasticsearch failed - Verify your log configuration file!'
            + colors.bcolors.ENDC)

    if error_count > 0:
        print(colors.bcolors.WARNING + '[!] ' + str(error_count) +
              ' log entries with misnamed or missing field values skipped!' +
              colors.bcolors.ENDC)
예제 #5
0
파일: beaconing.py 프로젝트: xia0pin9/RITA
def find_beacons_graph(customer, proto, category, save_dir):

    # Make directory to store graphs
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)

    # searching for beacons in log files, not results
    doc_type = 'results'

    # fields to return from elasticsearch query
    fields = [
        SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, 'min_hz', 'max_hz',
        TIMESTAMP
    ]

    # restrict results to specified customer
    if proto != "":
        constraints = [{PROTOCOL: proto}, {'result_type': category}]
        proto_temp = proto
    else:
        constraints = [{'result_type': category}]
        proto_temp = "All Protocols"

    # anything we want to filter out
    ignore = []

    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    print('>>> Retrieving information from elasticsearch...')

    # start index for results
    count = 0
    error_count = 0

    # Build a dictionary for beacon detection
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(
            customer, doc_type, fields, constraints, ignore, scroll_id,
            scroll_len)

        for entry in hits:

            count += 1
            progress_bar(count, scroll_size)

            try:
                src = entry['fields'][SOURCE_IP][0]
                dst = entry['fields'][DESTINATION_IP][0]
                dpt = entry['fields'][DESTINATION_PORT][0]
                min_hz = entry['fields']['min_hz'][0]
                max_hz = entry['fields']['max_hz'][0]
            except:
                error_count += 1
                continue

            times = get_datetimes(src, dst, dpt, customer, proto)

            if not len(times) > 10:
                return None

            span = times[-1] - times[0]
            if span > 0:
                n_times = len(times)

                for idx in range(1, n_times):
                    times[idx] = times[idx] - times[0]

                times[0] = 0

                n = scipy.zeros(times[-1] + 1)

                for time_idx in times:
                    n[time_idx] += 1

                fig = Figure()
                sub_fig = fig.add_subplot(111)

                span_6_hours = min([len(n), 21600])
                times_6_hours = n[:span_6_hours]

                #n, bins, patches = sub_fig.hist(times, span, normed=0,
                #                                histtype='step',
                #                                linestyle='dashed')

                sample_sz = len(n)
                k = scipy.arange(sample_sz)

                freq = k / float(sample_sz)
                freq = freq[:sample_sz // 2]

                Y = abs(np.fft.rfft(n) / sample_sz)

                Y = Y[:sample_sz // 2]
                zero_len = min([len(Y), 10])
                for idx in range(zero_len):
                    Y[idx] = 0

                curr_min_range = int((len(Y) / 0.5) * min_hz + 0.5)
                curr_max_range = int((len(Y) / 0.5) * max_hz + 0.5)
                Y = Y[curr_min_range:curr_max_range]
                freq = freq[curr_min_range:curr_max_range]

                canvas = FigureCanvas(fig)

                #P.setp(patches, 'facecolor', 'g', 'alpha', 0.75)
                sub_fig.plot(times_6_hours)
                sub_fig.set_title(category + ' (histogram)--Customer: ' +
                                  customer + '\nSrc: ' + src + ' Dest: ' +
                                  dst + ' Proto: ' + proto_temp +
                                  ' DstPort: ' + dpt)
                sub_fig.set_xlabel('Time Stamp (UNIT)')
                sub_fig.set_ylabel('Connection Attempts')
                P.gca().set_ylim(ymax=10)

                canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') +
                                    '_Dst-' + dst.replace('.', '_') + '_' +
                                    proto_temp + '_' + dpt + '_minhz-' +
                                    str(min_hz) + '_maxhz-' + str(max_hz) +
                                    '_' + customer + '_histb.png')
                P.close(fig)

                sub_fig.clear()

                fig = Figure()
                canvas = FigureCanvas(fig)
                sub_fig = fig.add_subplot(111)
                sub_fig.plot(freq, abs(Y), '--')
                sub_fig.set_title(category + ' (FFT)--Customer: ' + customer +
                                  '\nSrc: ' + src + ' Dest: ' + dst +
                                  ' Proto: ' + proto + ' DstPort: ' + dpt)
                sub_fig.set_xlabel('Freq (HZ)')
                sub_fig.set_ylabel('|Y(FREQ)|')
                canvas.print_figure(save_dir + 'Src-' + src.replace('.', '_') +
                                    '_Dst-' + dst.replace('.', '_') + '_' +
                                    proto + '_' + dpt + '_minhz-' +
                                    str(min_hz) + '_maxhz-' + str(max_hz) +
                                    '_' + customer + '_fft.png')
                P.close(fig)

        if len(hits) < 1:
            scrolling = False

    if error_count > 0:
        print(
            colors.bcolors.WARNING + '[!] ' + str(error_count) +
            ' results entries with misnamed or missing field values skipped!' +
            colors.bcolors.ENDC)

    print(colors.bcolors.OKGREEN + '[+] Finished generating graphs ' + '[+]' +
          colors.bcolors.ENDC)
예제 #6
0
파일: beaconing.py 프로젝트: xia0pin9/RITA
def perform_fft_mp(arglist):
    """
    Use fourier transform to look for beacons in a dataset specified in arg list
    make a table and mark those beacons in the database.
    """
    global CURR_DONE
    global UNLIKELY_CURR
    global TOTAL_TO_DO
    global TIME_DICT
    global CURR_DONE_LOCK

    key, db_queue = arglist

    # Mutex lock to update number of items completed so far
    with CURR_DONE_LOCK:
        CURR_DONE.value += 1
        local_curr_done = CURR_DONE.value

        # Draw a progress bar
        if (local_curr_done % 1000 == 0) or (local_curr_done
                                             == TOTAL_TO_DO.value):
            progress_bar(local_curr_done, TOTAL_TO_DO.value)

    src = key[0]  # Source IP
    dst = key[1]  # Destination IP
    dpt = key[2]  # Destination Port

    # Return if the sample size is too small
    if len(TIME_DICT[key]) < 10:
        return None

    # Sort the list of timestamps for this connection
    ts = sorted(TIME_DICT[key])

    # Make sure the last timestep is greater than the first
    if 0 < (ts[-1] - ts[0]):

        # Change the timestamp from seconds since the epoch to seconds since timestamp_0
        for idx in range(1, len(ts)):
            ts[idx] = ts[idx] - ts[0]
        ts[0] = 0

        # Create an array of seconds from 0 to the greatest timestep
        n = scipy.zeros(ts[-1])
        # For each timestamp, increment the count for that particular time in
        # the n array
        for time_idx in ts:
            n[time_idx - 1] = n[time_idx - 1] + 1

        sample_sz = len(n)

        # Create a range of numbers, 0 to the length of n
        k = scipy.arange(sample_sz)

        # Create a list of frequencies by dividing each element in k
        # by the length of k... ie k=1 -> freq=1/60
        freq = k / float(sample_sz)

        # Only look at the first half of the frequency range
        freq = freq[:sample_sz // 2]

        # Run Fast Fourier Transform on sample
        # Only look at positive frequencies from 0 to half the sample size
        Y = abs(np.fft.rfft(n) / sample_sz)
        Y = Y[:sample_sz // 2]

        # Get rid of high frequencies...
        zero_len = min([len(Y), 10])
        for idx in range(zero_len):
            Y[idx] = 0

        mar_vals = ()

        for mar_name, min_hz, max_hz in MAR_NAMES_LIST:

            if len(Y) <= 1:
                return None

            # Determine range of frequencies to examine
            curr_min_range = int((len(Y) / 0.5) * min_hz + 0.5)
            curr_max_range = int((len(Y) / 0.5) * max_hz + 0.5)
            tmp_Y = Y[curr_min_range:curr_max_range]

            if len(tmp_Y) <= 1:
                return None

            # Determine average and max value for frequencies in
            # the desired range
            fft_avg = np.mean(tmp_Y)
            y_max = np.amax(tmp_Y)

            if fft_avg <= 0:
                return None

            # Save max/average for the frequency range
            max_avg_ratio = y_max / fft_avg
            mar_vals += (max_avg_ratio, )

        ret_vals = (src, dst, dpt) + mar_vals
        db_queue.put(ret_vals)

    return None
예제 #7
0
파일: beaconing.py 프로젝트: naisanza/RITA
def beacon_analysis(customer, proto, result_type):

    global TOTAL_TO_DO
    global CURR_DONE
    global TIME_DICT
    CURR_DONE.value = 0
    worker_pool = Pool(processes=None, maxtasksperchild=1)


    # searching for beacons in log files, not results
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, PROTOCOL, TIMESTAMP]
    
    if proto != "":
        # restrict results to specified customer
        constraints = [{PROTOCOL:proto}]
    else:
        constraints = []
    
    # anything we want to filter out
    ignore = []

    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch and building a dictionary... ')

    # start index for progress bar
    count = 0
    error_count = 0

    # Build a dictionary for beacon detection
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len)

        for entry in hits:
            count += 1
            
            try:    
                # create dictionary key 
                key =  (entry['fields'][SOURCE_IP][0], 
                        entry['fields'][DESTINATION_IP][0],
                        entry['fields'][DESTINATION_PORT][0])

                # append timestamp to dictionary under unique key
                dt = dt_parser.parse(entry['fields'][TIMESTAMP][0])
                ts = time.mktime(dt.timetuple())
                TIME_DICT[key].append(int(ts))

            except:
                error_count += 1
                continue

        # Report progress
        progress_bar(count,scroll_size)

        # stop scrolling if no more hits
        if count == scroll_size:
            scrolling = False

    if not (len(TIME_DICT) == 0):
        # parallelize it
        m = Manager()
        db_queue = m.Queue()
        n_cores = multiprocessing.cpu_count()
        print('>>> Found ' + str(n_cores) + ' core(s)!')
        
        # create parameter list for threads and keys
        arglist = []
        for key in TIME_DICT:
            arglist.append((key, db_queue))

        # determine the total number of keys to be split up amongst threads
        TOTAL_TO_DO.value = len(arglist)

        # run the fft mapping
        print ">>> Running beacon analysis... "
        worker_pool.map(perform_fft_mp, iterable=arglist, chunksize=1000)
        
        # Write results to elasticsearch
        while not db_queue.empty():
            vals = []
            try:
                vals = db_queue.get()
                n_vals = len(list(vals))
            except:
                break                
            write_data(vals, customer, proto, result_type)
    else:
        print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file!'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped!'+ colors.bcolors.ENDC)
예제 #8
0
파일: beaconing.py 프로젝트: naisanza/RITA
def find_beacons_graph(customer, proto, category, save_dir):

    # Make directory to store graphs
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)


    # searching for beacons in log files, not results
    doc_type = 'results'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT, 'min_hz', 'max_hz', TIMESTAMP]
    
    # restrict results to specified customer
    if proto != "":
        constraints = [{PROTOCOL:proto}, {'result_type':category}]
        proto_temp = proto
    else:
        constraints = [{'result_type':category}]
        proto_temp = "All Protocols"
    
    # anything we want to filter out
    ignore = []

    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    print('>>> Retrieving information from elasticsearch...')

    # start index for results
    count = 0
    error_count = 0

    # Build a dictionary for beacon detection
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len)

        for entry in hits:

            count += 1
            progress_bar(count, scroll_size)
            
            try:
                src = entry['fields'][SOURCE_IP][0]
                dst = entry['fields'][DESTINATION_IP][0]
                dpt = entry['fields'][DESTINATION_PORT][0]
                min_hz = entry['fields']['min_hz'][0]
                max_hz = entry['fields']['max_hz'][0]
            except:
                error_count += 1
                continue


            times = get_datetimes(src, dst, dpt, customer, proto)

            if not len(times) > 10:
                return None

            span = times[-1] - times[0]
            if span > 0:
                n_times = len(times)

                for idx in range(1, n_times):
                    times[idx] = times[idx] - times[0]

                times[0] = 0
                
                n = scipy.zeros(times[-1] + 1)

                for time_idx in times:
                    n[time_idx] += 1

                fig = Figure()
                sub_fig = fig.add_subplot(111)

                span_6_hours = min([len(n), 21600])
                times_6_hours = n[:span_6_hours]            

                #n, bins, patches = sub_fig.hist(times, span, normed=0,
                #                                histtype='step',
                #                                linestyle='dashed')
                
                sample_sz = len(n)
                k = scipy.arange(sample_sz)

                freq = k/float(sample_sz)
                freq = freq[:sample_sz//2]

                Y = abs(np.fft.rfft(n)/sample_sz)

                Y = Y[:sample_sz//2]
                zero_len = min([len(Y), 10])
                for idx in range(zero_len):
                    Y[idx] = 0

                curr_min_range = int( (len(Y) / 0.5) * min_hz + 0.5)
                curr_max_range = int( (len(Y) / 0.5) * max_hz + 0.5)
                Y = Y[curr_min_range:curr_max_range]
                freq = freq[curr_min_range:curr_max_range]

                canvas = FigureCanvas(fig)
            
                #P.setp(patches, 'facecolor', 'g', 'alpha', 0.75)
                sub_fig.plot(times_6_hours)    
                sub_fig.set_title(category + ' (histogram)--Customer: '
                                  + customer+ '\nSrc: ' + src + ' Dest: ' + dst
                                  + ' Proto: ' + proto_temp + ' DstPort: ' + dpt)
                sub_fig.set_xlabel('Time Stamp (UNIT)')
                sub_fig.set_ylabel('Connection Attempts')
                P.gca().set_ylim(ymax=10)
                
                
                canvas.print_figure(save_dir + 'Src-'
                                    + src.replace('.', '_') + '_Dst-'
                                    + dst.replace('.', '_') + '_' + proto_temp 
                                    + '_' + dpt
                                    + '_minhz-' + str(min_hz)
                                    + '_maxhz-' + str(max_hz)
                                    + '_' + customer + '_histb.png')
                P.close(fig)

                sub_fig.clear()

                fig = Figure()
                canvas = FigureCanvas(fig)
                sub_fig = fig.add_subplot(111)
                sub_fig.plot(freq, abs(Y), '--')
                sub_fig.set_title(category +' (FFT)--Customer: ' + customer
                                  + '\nSrc: ' + src + ' Dest: ' + dst
                                  + ' Proto: ' + proto + ' DstPort: ' + dpt)
                sub_fig.set_xlabel('Freq (HZ)')
                sub_fig.set_ylabel('|Y(FREQ)|')
                canvas.print_figure(save_dir + 'Src-'
                                    + src.replace('.', '_') + '_Dst-'
                                    + dst.replace('.', '_') + '_' + proto
                                    + '_' + dpt
                                    + '_minhz-' + str(min_hz)
                                    + '_maxhz-' + str(max_hz)
                                    + '_' + customer+ '_fft.png')
                P.close(fig)

        

        if len(hits) < 1:
            scrolling = False

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + 
                ' results entries with misnamed or missing field values skipped!'+ colors.bcolors.ENDC)


    print(colors.bcolors.OKGREEN + '[+] Finished generating graphs '
         + '[+]' + colors.bcolors.ENDC)
예제 #9
0
파일: beaconing.py 프로젝트: naisanza/RITA
def perform_fft_mp(arglist):
    """
    Use fourier transform to look for beacons in a dataset specified in arg list
    make a table and mark those beacons in the database.
    """
    global CURR_DONE
    global UNLIKELY_CURR
    global TOTAL_TO_DO
    global TIME_DICT
    global CURR_DONE_LOCK

    key, db_queue = arglist

    # Mutex lock to update number of items completed so far
    with CURR_DONE_LOCK:
        CURR_DONE.value += 1
        local_curr_done = CURR_DONE.value

        # Draw a progress bar
        if (local_curr_done % 1000 == 0) or (local_curr_done == TOTAL_TO_DO.value):
            progress_bar(local_curr_done, TOTAL_TO_DO.value)
    

    src = key[0]  # Source IP
    dst = key[1]  # Destination IP
    dpt = key[2]  # Destination Port

    # Return if the sample size is too small
    if len(TIME_DICT[key]) < 10:
        return None

    # Sort the list of timestamps for this connection
    ts = sorted(TIME_DICT[key])
    
    # Make sure the last timestep is greater than the first
    if 0 < (ts[-1] - ts[0]):

        # Change the timestamp from seconds since the epoch to seconds since timestamp_0
        for idx in range(1, len(ts)):
            ts[idx] = ts[idx] - ts[0]
        ts[0] = 0
       
        # Create an array of seconds from 0 to the greatest timestep
        n = scipy.zeros(ts[-1])
        # For each timestamp, increment the count for that particular time in 
        # the n array
        for time_idx in ts:
            n[time_idx-1] = n[time_idx-1] + 1

        sample_sz = len(n)
 
        # Create a range of numbers, 0 to the length of n       
        k = scipy.arange(sample_sz)

        # Create a list of frequencies by dividing each element in k
        # by the length of k... ie k=1 -> freq=1/60
        freq = k/float(sample_sz)

        # Only look at the first half of the frequency range    
        freq = freq[:sample_sz//2]
        
        # Run Fast Fourier Transform on sample
        # Only look at positive frequencies from 0 to half the sample size            
        Y = abs(np.fft.rfft(n)/sample_sz)
        Y = Y[:sample_sz//2]
            
        # Get rid of high frequencies...
        zero_len = min([len(Y), 10])
        for idx in range(zero_len):
            Y[idx] = 0
        
        mar_vals = ()

        for mar_name, min_hz, max_hz in MAR_NAMES_LIST:

            if len(Y) <= 1:
                return None

            # Determine range of frequencies to examine
            curr_min_range = int( (len(Y) / 0.5) * min_hz + 0.5)
            curr_max_range = int( (len(Y) / 0.5) * max_hz + 0.5)
            tmp_Y = Y[curr_min_range:curr_max_range]

            if len(tmp_Y) <= 1:
                return None
        
            # Determine average and max value for frequencies in
            # the desired range
            fft_avg = np.mean(tmp_Y)
            y_max = np.amax(tmp_Y)

            if fft_avg <= 0:
                return None

            # Save max/average for the frequency range
            max_avg_ratio = y_max / fft_avg
            mar_vals += (max_avg_ratio,)

        ret_vals = (src, dst, dpt) + mar_vals
        db_queue.put( ret_vals )

    return None
예제 #10
0
def find_cross_analysis(customer, result_type):
    # Search will be conducted in log files
    doc_type = 'results'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, 'result_type']
    
    # restrict results to specified customer and eventId to list of possible IDs
    constraints = []
    
    # anything we want to filter out
    ignore = []

    sort = ""

    # create dictionary to store user login info
    crossref_dict = defaultdict(dict)

    scroll_id = ""

    scrolling = True

    scroll_len = 1000

    count = 0
    error_count = 0

    print(colors.bcolors.OKBLUE +'>>> Retrieving information from elasticsearch...')
    
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len, sort)

        # For every unique username (used as dict key), make a dictionary of event activity
        for entry in hits:
            try:
                src_list = entry['fields'][SOURCE_IP]
                behavior = entry['fields']['result_type'][0]

                if behavior not in CROSSREF_BEHAVIORS:
                    continue              
            except:
                error_count += 1
                continue

            for src in src_list:
                # If src has not been added to dictionary, add it
                if src not in crossref_dict:
                    crossref_dict[src] = []

                if behavior not in crossref_dict[src]:
                    crossref_dict[src].append(behavior)
            

        # stop scrolling if no more hits
        if len(hits) < 1:
          scrolling = False
        else:
            count += len(hits)
            # Report progress
            if (count % 10 == 0) or (count == scroll_size):
                progress_bar(count, scroll_size)

    crossref_dict_len = len(crossref_dict)
    if not (crossref_dict_len == 0):
        num_found = 0
        print('>>> Performing cross-analysis and writing results to elasticsearch... ')

        # Record all src ips with multiple behaviors
        count = 0
        for src in sorted(crossref_dict, key=lambda src: len(crossref_dict[src]), reverse=True):
            # Report progress
            count += 1
            progress_bar(count, crossref_dict_len)
            
            if len(crossref_dict[src]) > 1:
                num_found += 1
                write_data(src, crossref_dict[src], customer, result_type)

        print(colors.bcolors.WARNING + '[+] ' + str(num_found) + ' source IPs with multiple malicious behaviors found! [+]'+ colors.bcolors.ENDC)
    else:
        print (colors.bcolors.WARNING + '\nQuerying elasticsearch failed - Verify that you have ran the other modules first!'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
예제 #11
0
파일: long_urls.py 프로젝트: xia0pin9/RITA
def find_long_urls(customer, threshold, result_type):
    # searching for duration in log files, not results
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, URL]

    # restrict results to specified customer
    constraints = []

    # anything we want to filter out
    ignore = []

    scroll_id = ""

    scroll_len = 1000

    scrolling = True

    print(colors.bcolors.OKBLUE +
          '>>> Retrieving information from elasticsearch...')

    url_dict = {}

    count = 0
    error_count = 0

    while scrolling:

        # Retrieve data, which will come in sorted by longest entry for url field
        hits, scroll_id, scroll_size = ht_data.get_data(
            customer, doc_type, fields, constraints, ignore, scroll_id,
            scroll_len)

        progress_bar(count, scroll_size)
        for i in hits:
            count += 1

            try:
                url = i['fields'][URL][0]
                data = i['fields']
            except:
                error_count += 1
                continue

            key = len(url)

            # If key already exists, append the data, otherwise create new key with list that holds data
            if key in url_dict.keys():
                url_dict[key].append(data)
            else:
                url_dict[key] = [data]

        if len(hits) < 1:
            scrolling = False

    # Get total number of keys (unique url lengths)
    total_keys = len(url_dict)

    # Verify that ES query actually returned some results
    if not total_keys == 0:
        print '>>> Finding the longest URLS... '
        final_res = []
        key_count = 0
        keys = sorted(url_dict.keys(), reverse=True)
        done = False

        # Get threshold amount of longest urls
        for url_length in keys:
            if done == True:
                break
            for entry in url_dict[url_length]:
                if (key_count % 10 == 0) or (key_count == threshold):
                    progress_bar(key_count, threshold)
                key_count += 1
                if key_count > threshold:
                    done = True
                    break
                else:
                    final_res.append(entry)

        # WRITE THE DATA
        write_count = 0
        write_total = len(final_res)
        print '>>> Writing results of analysis...'
        for data in final_res:
            write_count += 1
            if (write_count % 10 == 0) or (write_count == write_total):
                progress_bar(write_count, write_total)
            write_data(data, customer, result_type)

    else:
        print(
            colors.bcolors.WARNING +
            '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'
            + colors.bcolors.ENDC)

    if error_count > 0:
        print(
            colors.bcolors.WARNING + '[!] ' + str(error_count) +
            ' log entries with misnamed or missing field values skipped! [!]' +
            colors.bcolors.ENDC)
예제 #12
0
def find_concurrent(customer, result_type):
    # Search will be conducted in log files
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [EVENT_ID, USER_NAME, SOURCE_IP, TIMESTAMP]

    # restrict results to specified customer and eventId to list of possible IDs
    constraints = []

    # anything we want to filter out
    ignore = []

    # Sort results by timestamp
    sort = TIMESTAMP + ':asc'

    # create dictionary to store user login info
    concurrent_dict = defaultdict(dict)

    scroll_id = ""

    scrolling = True

    scroll_len = 1000

    count = 0
    error_count = 0

    print(colors.bcolors.OKBLUE +
          '>>> Retrieving information from elasticsearch...')

    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(
            customer, doc_type, fields, constraints, ignore, scroll_id,
            scroll_len, sort)

        # For every unique username (used as dict key), make a dictionary of event activity
        for entry in hits:
            try:
                user = entry['fields'][USER_NAME][0]
                event = entry['fields'][EVENT_ID][0]
            except:
                error_count += 1
                continue

            # If user name has not been added to dictionary, add set login counts to 0
            if user not in concurrent_dict:
                concurrent_dict[user]['logged_on'] = False
                concurrent_dict[user]['concurrent'] = 0
                concurrent_dict[user]['max_concurrent'] = 0
                concurrent_dict[user]['src_list'] = []

            try:
                src = entry['fields'][SOURCE_IP][0]
            except:
                src = None

            # Add only unique source ips
            if src not in concurrent_dict[user]['src_list']:
                concurrent_dict[user]['src_list'].append(src)

            # If event id indicates a logon mark the user as such, and add to the concurrent count if
            # the user is already logged on
            if event == LOG_ON:
                if concurrent_dict[user]['logged_on'] == True:
                    concurrent_dict[user]['concurrent'] += 1
                    if concurrent_dict[user][
                            'max_concurrent'] < concurrent_dict[user][
                                'concurrent']:
                        concurrent_dict[user][
                            'max_concurrent'] = concurrent_dict[user][
                                'concurrent']
                else:
                    concurrent_dict[user]['logged_on'] = True

            # If the even id indicates a logoff reduce the concurrent count and, if the concurrent count is
            # now zero, mark the user as logged off
            elif (event == LOG_OFF) or (event == LOG_OFF2):
                if 0 < concurrent_dict[user]['concurrent']:
                    concurrent_dict[user]['concurrent'] -= 1
                if concurrent_dict[user]['concurrent'] == 0:
                    concurrent_dict[user]['logged_on'] = False

        # stop scrolling if no more hits
        if len(hits) < 1:
            scrolling = False
        else:
            count += len(hits)
            # Report progress
            if (count % 10 == 0) or (count == scroll_size):
                progress_bar(count, scroll_size)

    if not (len(concurrent_dict) == 0):
        num_found = 0
        print(
            '>>> Checking for concurrent logins and writing results to elasticsearch... '
            + colors.bcolors.ENDC)

        # record all users with concurrent logins
        for user, data in concurrent_dict.iteritems():
            if data['max_concurrent'] > 0:
                num_found += 1
                write_data(user, data, customer, result_type)

        print(colors.bcolors.WARNING + '[+] ' + str(num_found) +
              ' concurrent logins found! [+]' + colors.bcolors.ENDC)
    else:
        print(
            colors.bcolors.WARNING +
            '\nQuerying elasticsearch failed - Verify your log configuration file!'
            + colors.bcolors.ENDC)

    if error_count > 0:
        print(
            colors.bcolors.WARNING + '[!] ' + str(error_count) +
            ' log entries with misnamed or missing field values skipped! [!]' +
            colors.bcolors.ENDC)
예제 #13
0
def find_blacklisted_ipvoid(customer, result_type):
    global CURR_DONE
    global TOTAL_TO_DO

    CURR_DONE.value = 0

    # Analysis will be done on log files, not results
    doc_type = 'logs'

    # restrict results to specified customer
    constraints = []
    
    # anything we want to filter out
    ignore = []

    print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch...')
    
    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP]


    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    count = 0
    error_count = 0

    # build dictionary for blacklist detection
    blacklist_dict = defaultdict(list)

    while scrolling:

        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len)
        

        # For every unique destination ip (used as dict key), find make a list of all
        # src ips that connect to it
        for entry in hits:
            count += 1
            try:
                dst =  entry['fields'][DESTINATION_IP][0]
                src =  entry['fields'][SOURCE_IP][0]
            except:
                error_count += 1
                continue

            # Verify that source IP is internal and that destination ip is external
            if len(dst) != 0 and len(src) != 0:
                if (filter_ip(src) == False) and (filter_ip(dst) == True):
                    # Check for duplicate source IPs
                    try:
                        if src not in blacklist_dict[dst]:
                            blacklist_dict[dst].append(src)
                    except:
                        continue

        if len(hits) < 1:
            scrolling = False
        else:
            progress_bar(count, scroll_size)

    # Get total number of keys (unique url lengths)
    total_keys = len(blacklist_dict)
    
    # Verify that ES query actually returned some results
    if not total_keys == 0:
        print('>>> Querying blacklist....')

        # Get the multiprocessing stuff ready
        TOTAL_TO_DO.value = len(blacklist_dict)
        workers = Pool(64)

        # create parameter list for threads and keys
        arglist = [(entry, customer, result_type) for entry in blacklist_dict.items()]
     
        # workers.map(find_blacklisted_ipvoid_mp, blacklist_dict.items())
        workers.map(find_blacklisted_ipvoid_mp, arglist)
    else:
        print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
예제 #14
0
def scan_analysis(customer, proto, threshold, graph, graph_thresh, potential_save_dir, result_type):
    # Search will be conducted in log files
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP, DESTINATION_PORT]
    
    # restrict results to specified customer      
    if proto != "" and proto != 'web':
        constraints = [{PROTOCOL:proto}]
    else:
        constraints = []
    
    # anything we want to filter out
    ignore = []

    scroll_id = ""
    scroll_len = 1000
    scrolling = True

    count = 0
    error_count = 0

    print(colors.bcolors.OKBLUE + '>>> Retrieving information from elasticsearch and building dictionary...')

    # build dictionary for scan detection
    scan_dict = defaultdict(list)

    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type,fields, constraints, ignore, scroll_id, scroll_len)

        # Report progress
        if (count % 10 == 0) or (count == scroll_size):
            progress_bar(count, scroll_size)

        for entry in hits:
            count += 1
            try:    
                # Get source ip, destination ip, and port of current log entry
                src = entry['fields'][SOURCE_IP][0]
                dst = entry['fields'][DESTINATION_IP][0]
                dpt = entry['fields'][DESTINATION_PORT][0]

                if dpt == '':
                    error_count += 1
                    continue

            except:
                error_count += 1
                continue

            # Set up dictionary key as source and destination ip pair
            key =  (src, dst)

            # Add all destination ports
            scan_dict[key].append(dpt)

        

        if len(hits) < 1:
          scrolling = False

   
    # Get total number of keys (unique source - destination pairs)
    total_keys = len(scan_dict)

    if not total_keys == 0:
        print('>>> Running scan analysis ... ')

        key_count = 0
        unlikely_found = 0
        likely_found = 0

        # Iterate over all the keys...
        for key in scan_dict:
            key_count += 1

            if (key_count % 20 == 0) or (key_count == total_keys):
                progress_bar(key_count, total_keys)

            # Extract values from key string
            src = key[0]
            dst = key[1]

            # Get ports that match the source-destination pair
            ports = scan_dict[key]

            # Get number of unique destination ports
            num_unique_ports = len(set(ports))

            # Get total number ports
            num_total_ports = len(ports)

            # If there are more than specified amount of ports, flag as likely scan
            if num_unique_ports > threshold:
                if graph and (num_unique_ports > graph_thresh):
                    ports = [int(i) for i in scan_dict[key]]
                    graph_scans(customer, src, dst, proto, ports, threshold, potential_save_dir)
                write_data(src, dst, ports, num_unique_ports, num_total_ports, proto, customer, result_type)
                likely_found += 1
            else:
                unlikely_found += 1

        # Report number of potential scans found    
        print(colors.bcolors.FAIL + '[!] Found ' + str(likely_found) + ' potential port scans [!]'
              + colors.bcolors.ENDC)
    else:
        print (colors.bcolors.WARNING + '[!] Querying elasticsearch failed - Verify your protocol choice or log configuration file! [!]'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
예제 #15
0
파일: concurrent.py 프로젝트: naisanza/RITA
def find_concurrent(customer, result_type):
    # Search will be conducted in log files
    doc_type = 'logs'

    # fields to return from elasticsearch query
    fields = [EVENT_ID, USER_NAME, SOURCE_IP, TIMESTAMP]
    
    # restrict results to specified customer and eventId to list of possible IDs
    constraints = []
    
    # anything we want to filter out
    ignore = []

    # Sort results by timestamp
    sort = TIMESTAMP + ':asc'

    # create dictionary to store user login info
    concurrent_dict = defaultdict(dict)

    scroll_id = ""

    scrolling = True

    scroll_len = 1000

    count = 0
    error_count = 0

    print(colors.bcolors.OKBLUE +'>>> Retrieving information from elasticsearch...')
    
    while scrolling:
        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(customer, doc_type, fields, constraints, ignore, scroll_id, scroll_len, sort)

        # For every unique username (used as dict key), make a dictionary of event activity
        for entry in hits:
            try:
                user  =  entry['fields'][USER_NAME][0]
                event = entry['fields'][EVENT_ID][0]
                src   = entry['fields'][SOURCE_IP][0]
            except:
                error_count += 1
                continue
            
            # If user name has not been added to dictionary, add set login counts to 0
            if user not in concurrent_dict:
                concurrent_dict[user]['logged_on'] = False
                concurrent_dict[user]['concurrent'] = 0
                concurrent_dict[user]['max_concurrent'] = 0
                concurrent_dict[user]['src_list'] = []
            
            # Add only unique source ips
            if src not in concurrent_dict[user]['src_list']:
                concurrent_dict[user]['src_list'].append(src)

            # If event id indicates a logon mark the user as such, and add to the concurrent count if
            # the user is already logged on
            if event == LOG_ON:
                if concurrent_dict[user]['logged_on'] == True:
                    concurrent_dict[user]['concurrent'] += 1
                    if concurrent_dict[user]['max_concurrent'] < concurrent_dict[user]['concurrent']:
                        concurrent_dict[user]['max_concurrent'] = concurrent_dict[user]['concurrent']
                else:
                    concurrent_dict[user]['logged_on'] = True

            # If the even id indicates a logoff reduce the concurrent count and, if the concurrent count is 
            # now zero, mark the user as logged off
            elif event == LOG_OFF:
                if 0 < concurrent_dict[user]['concurrent']:
                    concurrent_dict[user]['concurrent'] -= 1
                if concurrent_dict[user]['concurrent'] == 0:
                    concurrent_dict[user]['logged_on'] = False

        # stop scrolling if no more hits
        if len(hits) < 1:
          scrolling = False
        else:
            count += len(hits)
            # Report progress
            if (count % 10 == 0) or (count == scroll_size):
                progress_bar(count, scroll_size)

    if not (len(concurrent_dict) == 0):
        num_found = 0
        print('>>> Checking for concurrent logins and writing results to elasticsearch... '+ colors.bcolors.ENDC)

        # record all users with concurrent logins
        for user, data in concurrent_dict.iteritems():
            if data['max_concurrent'] > 0:
                num_found += 1
                write_data(user, data, customer, result_type)

        print(colors.bcolors.WARNING + '[+] ' + str(num_found) + ' concurrent logins found! [+]'+ colors.bcolors.ENDC)
    else:
        print (colors.bcolors.WARNING + '\nQuerying elasticsearch failed - Verify your log configuration file!'+ colors.bcolors.ENDC)

    if error_count > 0:
        print (colors.bcolors.WARNING + '[!] ' + str(error_count) + ' log entries with misnamed or missing field values skipped! [!]'+ colors.bcolors.ENDC)
예제 #16
0
def find_blacklisted_ipvoid(customer, result_type):
    global CURR_DONE
    global TOTAL_TO_DO

    CURR_DONE.value = 0

    # Analysis will be done on log files, not results
    doc_type = 'logs'

    # restrict results to specified customer
    constraints = []

    # anything we want to filter out
    ignore = []

    print(colors.bcolors.OKBLUE +
          '>>> Retrieving information from elasticsearch...')

    # fields to return from elasticsearch query
    fields = [SOURCE_IP, DESTINATION_IP]

    scroll_id = ""
    scroll_len = 1000

    scrolling = True

    count = 0
    error_count = 0

    # build dictionary for blacklist detection
    blacklist_dict = defaultdict(list)

    while scrolling:

        # Retrieve data
        hits, scroll_id, scroll_size = ht_data.get_data(
            customer, doc_type, fields, constraints, ignore, scroll_id,
            scroll_len)

        # For every unique destination ip (used as dict key), find make a list of all
        # src ips that connect to it
        for entry in hits:
            count += 1
            try:
                dst = entry['fields'][DESTINATION_IP][0]
                src = entry['fields'][SOURCE_IP][0]
            except:
                error_count += 1
                continue

            # Verify that source IP is internal and that destination ip is external
            if len(dst) != 0 and len(src) != 0:
                if (filter_ip(src) == False) and (filter_ip(dst) == True):
                    # Check for duplicate source IPs
                    try:
                        if src not in blacklist_dict[dst]:
                            blacklist_dict[dst].append(src)
                    except:
                        continue

        if len(hits) < 1:
            scrolling = False
        else:
            progress_bar(count, scroll_size)

    # Get total number of keys (unique url lengths)
    total_keys = len(blacklist_dict)

    # Verify that ES query actually returned some results
    if not total_keys == 0:
        print('>>> Querying blacklist....')

        # Get the multiprocessing stuff ready
        TOTAL_TO_DO.value = len(blacklist_dict)
        workers = Pool(64)

        # create parameter list for threads and keys
        arglist = [(entry, customer, result_type)
                   for entry in blacklist_dict.items()]

        # workers.map(find_blacklisted_ipvoid_mp, blacklist_dict.items())
        workers.map(find_blacklisted_ipvoid_mp, arglist)
    else:
        print(
            colors.bcolors.WARNING +
            '[!] Querying elasticsearch failed - Verify your log configuration file! [!]'
            + colors.bcolors.ENDC)

    if error_count > 0:
        print(
            colors.bcolors.WARNING + '[!] ' + str(error_count) +
            ' log entries with misnamed or missing field values skipped! [!]' +
            colors.bcolors.ENDC)