示例#1
0
def port_flap(host, yang_message, error, tag, current_case=None):
    '''
    Workflow for fixing port flapping, similarly to the ifdown function.
    :param host: the from which the event originated.
    :param yang_message: the yang data.
    :param error: the events error.
    :param tag: the events tag.
    :param current_case: the current oats case id.
    :return: error, tag, comment, success (bool)
    '''
    conf = 'No changes'
    success = False
    interface = oatsdbhelpers.get_interface(error, yang_message)
    comment = 'Port Flapping on  ' + host + ' detected. '
    if current_case is None or current_case == 'None':
        current_case = oatspsql.create_case(
            error, host, solution='Case created in salt: `tshoot.port_flap`.')
    interface_neighbor = oatsnb.get_interface_neighbor(host,
                                                       interface,
                                                       case=current_case)

    #neighbors = oatsnb.get_neighbors(interface_neighbor, case=current_case)
    #device_up = oatssalthelpers.check_device_connectivity(neighbors, interface_neighbor, case=current_case)

    oatssalthelpers.if_shutdown(host, interface, case=current_case)
    conf = oatssalthelpers.if_noshutdown(host, interface, case=current_case)
    success = oatssalthelpers.ping(host,
                                   interface_neighbor,
                                   check_connectivity=True,
                                   case=current_case)
    if success:
        success = True
        comment += ('Resolved Port Flapping on Interface`' + interface + '`.')
        oatssalthelpers.post_slack(comment, case=current_case)
        oatspsql.close_case(current_case)
    else:
        oatspsql.update_case(
            current_case,
            solution='Port flapping could not get resolved. Technician needed.',
            status=oatspsql.Status.ONHOLD.value)
        comment = ('Could not fix port flapping status of `' + interface +
                   '` on host' + host + ' .')
        oatssalthelpers.post_slack(comment, case=current_case)
        success = False
        oatspsql.update_case(current_case,
                             solution='Device ' + interface_neighbor +
                             ' is unreachable. Technician needed.',
                             status=oatspsql.Status.ONHOLD.value)
        oatssalthelpers.post_slack(comment, case=current_case)

    return {
        'error': error,
        'tag': tag,
        'comment': comment,
        'changes': conf,
        'success': success
    }
示例#2
0
def test_update_case():
    caseid = oatspsql.create_case(error='test',
                                  host='test',
                                  description='Test',
                                  status='new')
    updated = oatspsql.update_case(caseid, solution='New stuff')
    deleted = oatspsql.delete_case(caseid)
    assert updated is not None and caseid is not None and deleted is True
示例#3
0
def compress(data, host, timestamp, severity, error, sensor_type,
             event_name, correlate_for=10, use_oats_case=False):
    '''
    Takes event of the same kind and compresses them. Once the first event reaches this function
    it will stop the propagation of the same kind of function for the given amount of time.
    Once the time is passed it will send the first event to the salt master.
    :param data: the data of the event.
    :param host: the host from which the event originated.
    :param timestamp: the events timestamp.
    :param severity: the events severity.
    :param error: the events error eg. OSPF_NEIGHBOR_DOWN.
    :param sensor_type: the sensor which detected the event eg. syslog.
    :param event_name: the events event name.
    :param correlate_for: the amount of time to compress for.
    :param use_oats_case: if set will generate an oats case in psql.
    :return: None
    '''
    oatsinflux.write_event(host, timestamp, sensor_type, event_name, severity, data)
    cache_id = 'compress' + event_name
    lock = threading.Lock()
    lock.acquire()
    current_case = None
    if cache is None or cache_id not in cache or error not in cache[cache_id]:
        # first thread initializes and populates dict
        logger.debug('Starting compression of [{0}] events...'.format(event_name))
        __init_cache(error, cache_id, correlate_for)
    else:
        # later threads increment counter
        cache[cache_id][error]['counter'] += 1
        lock.release()
        return
    lock.release()
    if use_oats_case:
        current_case = __create_db_case(error, host, 'compress')
        oatspsql.update_case(current_case,
                             solution='Waiting for {0} seconds to compress {1} events.'.format(correlate_for, event_name))

    # compress events
    time.sleep(correlate_for)
    logger.debug('Compression finished, amount of compressed [{0}] events: {1}.'
                 .format(event_name, cache[cache_id][error]['counter']))
    if use_oats_case:
        __update_db_case(current_case, cache[cache_id][error]['counter'], event_name)
    EventProcessor.process_event(data=data, host=host, timestamp=timestamp,
                                 sensor_type=sensor_type, event_name=event_name, severity=severity,
                                 case=current_case, influx_write=False)
示例#4
0
    :param case: ID to update the case information in the psql database
    :return: a list of hostnames
    '''
    # custom field on ip address to poll ospf area
    nb = connect()
    host = str(host)
    ospf_nb = []
    logger.debug('Trying to get ospf_neighbors for host {0}'.format(host))
    try:
        neighborip = nb.ipam.ip_addresses.filter(device=host)
        for nbip in neighborip:
            if nbip.custom_fields["OSPF_area"] is not None:
                ospf_nb.append(nbip.custom_fields["OSPF_area"])
        if case:
            sol = 'Got OSPF neighbors of ' + host
            oatspsql.update_case(case_id=case, solution=sol)
    except Exception as e:
        logger.exception(
            'Exception in oatsnb.get_ospf_neighbors for host {0}'.format(host))
    logger.debug('Got ospf neighbors for host {0}'.format(host))
    return ospf_nb


def get_vrf_ip(host):
    '''
    Function to get a custom field on the device in netbox which specifies the management ip address
    :param host: hostname
    :return: ip address of the management ip address
    '''
    # custom field on device to poll salt master
    nb = connect()
示例#5
0
def out_discards_exceeded(data, host, timestamp, current_case):
    '''
    Function that loads a policy onto a device affected by a high amount of discarded packets.
    The policy throttles the traffic from one IP to another with a certain port number.
    The source-, destination-IP and port number are gathered by evaluating netflow data.
    After a certain amount of time the policy is removed again.
    :param data: contains the affected interface and the value of discarded packets.
    :param host: the affected host.
    :param timestamp: the timestamp of the event.
    :param current_case: the current oats case-id
    :return: error, comment, changes, slack-post-status(bool), success(bool)
    '''
    if current_case is None or current_case == 'None':
        current_case = oatspsql.create_case(
            "OUT_DISCARDS_EXCEEDED",
            host,
            solution='Case created in salt: `tshoot.out_discards_exceeded`.')

    src_flow = None
    # timeout while loop after 20secs
    timeout = time.time() + 60
    comment = ''
    while src_flow is None:
        flows = oatsinflux.get_type_data('netflow',
                                         timestamp,
                                         'netflow/*/data',
                                         30,
                                         host=host)
        src_flow = oatssalthelpers.get_src_flow(flows, 15000)
        time.sleep(1)
        if time.time() > timeout:
            break
    if src_flow is not None:
        dst_flow_port = src_flow['11']
        interface = data['name']
        src_ip_address = src_flow['8']
        dst_ip_address = src_flow['12']
        oatspsql.update_case(
            current_case,
            solution=
            'Found responsible flow: src_ip = `{0}`, dst_ip = `{1}`, port_number = `{2}`'
            .format(src_ip_address, dst_ip_address, dst_flow_port))
        minion = oatsnb.get_hostname(host)
        oatssalthelpers.apply_policy(minion, 8000, interface, src_ip_address,
                                     dst_ip_address, dst_flow_port)
        oatssalthelpers.remove_policy(minion, interface, src_ip_address,
                                      dst_ip_address, dst_flow_port)
        comment = "Discarded packets on host {0} on egress interface `{1}` exceeded threshhold. " \
                  "Destination port of traffic: `{2}`.\n".format(host, data['name'], dst_flow_port)
        comment += "Applied traffic throttlinc policy for 120 seconds.\n"
    else:
        comment += 'Could not determine source of traffic, possible DDoS attack detected' \
                   ' because traffic source port is `port 0`.'

    slack_status = oatssalthelpers.post_slack(comment, case=current_case)

    ret = {
        'error': 'OUT_DISCARDS',
        'comment': comment,
        'changes': 'conf',
        'slack-post-status:': slack_status,
        'success': bool(dst_flow_port)
    }
    return ret
示例#6
0
def ifdown(host, yang_message, error, tag, current_case=None):
    '''
    Function that gathers data in the network and executes a workflow according to the data.
    Is triggered by the salt system once an INTERFACE_DOWN event arrives in the salt
    event bus. Will try to fix the error or send a notification if it is unable to
    do so.
    :param host: the host that started this workflow.
    :param yang_message: the yang data.
    :param error: the events error.
    :param tag: the events tag.
    :param current_case: the current oats case id.
    :return: error, tag, a comment, configuration changes, success (bool).
    '''
    conf = 'No changes'
    success = False
    interface = oatsdbhelpers.get_interface(error, yang_message)
    comment = 'Interface down status on host ' + host + ' detected. '
    if current_case is None or current_case == 'None':
        current_case = oatspsql.create_case(
            error, host, solution='Case created in salt: `tshoot.ifdown`.')
    interface_neighbor = oatsnb.get_interface_neighbor(host,
                                                       interface,
                                                       case=current_case)

    neighbors = oatsnb.get_neighbors(interface_neighbor, case=current_case)
    device_up = oatssalthelpers.check_device_connectivity(neighbors,
                                                          interface_neighbor,
                                                          case=current_case)

    if device_up:
        # cycle affected interface
        oatssalthelpers.if_shutdown(host, interface, case=current_case)
        conf = oatssalthelpers.if_noshutdown(host,
                                             interface,
                                             case=current_case)
        # check if cycle was successful
        success = oatssalthelpers.ping(host,
                                       interface_neighbor,
                                       check_connectivity=True,
                                       case=current_case)
        if success:
            success = True
            comment += ('Config for Interface `' + interface +
                        '` automatically changed from down to up')
            # TODO: remove, only useful for debugging
            oatssalthelpers.post_slack(comment, case=current_case)
            oatspsql.close_case(current_case)
        else:
            oatspsql.update_case(current_case,
                                 solution=error +
                                 'could not get resolved. Technician needed.',
                                 status=oatspsql.Status.ONHOLD.value)
            comment = ('Could not fix down status of `' + interface +
                       '` on host' + host + ' .')
            oatssalthelpers.post_slack(comment, case=current_case)
    if not device_up:
        # TODO: powercycle, check power consumation
        success = False
        oatspsql.update_case(current_case,
                             solution='Device ' + interface_neighbor +
                             ' is unreachable. Technician needed.',
                             status=oatspsql.Status.ONHOLD.value)
        comment += 'Interface `' + interface + '` on host ' + host + ' down. Neighbor ' + interface_neighbor + ' is down.'
        oatssalthelpers.post_slack(comment, case=current_case)
        comment += ' Could not restore connectivity - Slack Message sent.'

    return {
        'error': error,
        'tag': tag,
        'comment': comment,
        'changes': conf,
        'success': success
    }
示例#7
0
                                                       case=current_case)
    n_of_neighbors = len(
        oatsnb.get_ospf_neighbors(interface_neighbor, case=current_case))
    oatssalthelpers.ospf_shutdown(interface_neighbor,
                                  process_number,
                                  case=current_case)
    async_result = pool.apply_async(
        oatssalthelpers.wait_for_event,
        ('syslog/*/OSPF_NEIGHBOR_UP/ospf_nbrs_up', 120, current_case))
    conf = oatssalthelpers.ospf_noshutdown(interface_neighbor,
                                           process_number,
                                           case=current_case)
    success = async_result.get()
    if success:
        oatspsql.update_case(
            current_case,
            'Successfully restarted OSPF process on host {0}.'.format(
                interface_neighbor), oatspsql.Status.DONE.value)
        comment += ' OSPF process restarted successfully.'
    else:
        oatspsql.update_case(
            current_case,
            'Unable to restart OSPF process on host {0}. Host might be offline.'
            '. Technician needed.'.format(interface_neighbor),
            oatspsql.Status.ONHOLD.value)
    slack_post = oatssalthelpers.post_slack(comment, case=current_case)

    ret = {
        'error': error,
        'tag': tag,
        'comment': comment,
        'changes': conf,
示例#8
0
def aggregate_distinct(data, host, timestamp, severity, error, sensor_type,
                       event_name, distinct_events, aggregation_event_name=None,
                       correlate_for=None, use_oats_case=False):
    '''
    Takes distinct events and aggregates them. The first event will start the aggregation
    for the given amount of time. Each time an additional event that is given by distinct_events
     reaches this function a counter will incremented. Once the time passed, the counter is evaluated.
    If the counter is the same as the number stated in distinct_events an event with the event name
    "aggregation_event_name" is generated. Else an event with the event name "event_name" is
    generated.
    Note: the events do not have to be distinct, but for aggregation of identical events the use of
    aggregate_identical is suggested.
    :param data: the data of the event.
    :param host: the host from which the event originated.
    :param timestamp: the events timestamp.
    :param severity: the events severity.
    :param error: the events error eg. OSPF_NEIGHBOR_DOWN.
    :param sensor_type: the sensor which detected the event eg. syslog.
    :param event_name: the events event name.
    :param distinct_events: dict of the form { event_name: x_amount_of_events, event_name2: y_amount_of_events }
        eg. { 'syslog/*/INTERFACE_CHANGED/down': 2, 'syslog/*/INTERFACE_CHANGED/up': 2}
    :param aggregation_event_name: the event name to use if the aggregation is successful
    :param correlate_for: the amount of time to aggregate for.
    :param use_oats_case: if set will generate an oats case in psql.
    :return: None
    '''
    oatsinflux.write_event(host, timestamp, sensor_type, event_name, severity, data)
    cache_id = 'aggregate_distinct' + error
    lock = threading.Lock()
    lock.acquire()
    if not 'event_names' in locals():
        event_names = []
    current_case = None

    if cache is None or cache_id not in cache or host+event_name not in cache[cache_id]:
        logger.debug('Starting aggregation of distinct events...')
        # first thread initializes and populates dict
        __init_cache(host+event_name, cache_id, correlate_for,host=host, additional_events=distinct_events.keys())
        event_names.append(host+event_name)
    else:
        logger.debug('Additional (distinct) event detected, incrementing counter...')
        # later threads increment counter
        cache[cache_id][host+event_name]['counter'] += 1
        event_names.append(host+event_name)
        lock.release()
        return
    lock.release()
    if use_oats_case:
        current_case = __create_db_case(error, host, 'aggregate')
        oatspsql.update_case(current_case,
                             solution='Waiting for {0} seconds to aggregate distinct events.'.format(correlate_for))

    # wait for additional events
    time.sleep(correlate_for)
    success = True
    for event in event_names:
        if not cache[cache_id][event]['counter'] >= distinct_events[event[3:]]:
            success = False
            break
    if success:
        if use_oats_case:
            oatspsql.update_case(current_case,
                                 solution='Aggregation successful: sending `{0}` event to salt master.'
                                 .format(aggregation_event_name))
        logger.debug('Aggregation successful.'
                     .format(aggregation_event_name))
        EventProcessor.process_event(data=data, host=host, timestamp=timestamp,
                                     sensor_type=sensor_type, event_name=aggregation_event_name, severity=severity,
                                     case=current_case, influx_write=False)
    else:
        if use_oats_case:
            oatspsql.update_case(current_case,
                                 solution='Aggregation not successful: sending `{0}` event to salt master.'
                                 .format(event_name))
        logger.debug('Aggregation not successful.'
                     .format(event_name))
        EventProcessor.process_event(data=data, host=host, timestamp=timestamp,
                                     sensor_type=sensor_type, event_name=event_name, severity=severity,
                                     case=current_case, influx_write=False)
示例#9
0
def __update_db_case(current_case, counter, event_name):
    oatspsql.update_case(current_case,
                         solution='Time passed: `{0}` event counter is {1}. Sending `{0}`'
                                  ' event to salt master'.format(event_name, counter))
示例#10
0
def aggregate_identical(data, host, timestamp, severity, error, sensor_type,
                        event_name, n_of_events=None, alternative_event_name=None, correlate_for=None, use_oats_case=False):
    '''
    Takes identical events and aggregates them. The first event will start the aggregation
    for the given amount of time. Each time an additional event of the same kind reaches
    this function a counter will incremented. Once the time passed, the counter is evaluated.
    If the counter is the same as the number stated in n_of_events an event with the event name
    "event_name" is generated. Else an event with the event name "alternative_event_name" is
    generated.
    :param data: the data of the event.
    :param host: the host from which the event originated.
    :param timestamp: the events timestamp.
    :param severity: the events severity.
    :param error: the events error eg. OSPF_NEIGHBOR_DOWN.
    :param sensor_type: the sensor which detected the event eg. syslog.
    :param event_name: the event name that is used when aggregation is successful.
    :param n_of_events: the needed amount of events for the aggregation to be successful.
    :param alternative_event_name: the alternative name if the aggregation is not successful.
    :param correlate_for: the amount of time to aggregate for.
    :param use_oats_case: if set will generate an oats case in psql.
    :return: None
    '''
    oatsinflux.write_event(host, timestamp, sensor_type, event_name, severity, data)
    cache_id = 'aggregate' + event_name
    lock = threading.Lock()
    lock.acquire()
    current_case = None
    if cache is None or cache_id not in cache or error not in cache[cache_id]:
        # first thread initializes and populates dict
        logger.debug('Starting aggregation of [{0}] events'.format(event_name))
        __init_cache(error, cache_id, correlate_for)
    else:
        logger.debug('Additional [{0}] event detected. Incrementing counter...'
                     .format(event_name))
        # later threads increment counter
        cache[cache_id][error]['counter'] += 1
        lock.release()
        return
    lock.release()
    if use_oats_case:
        current_case = __create_db_case(error, host, 'aggregate')
        oatspsql.update_case(current_case,
                             solution='Waiting for {0} seconds to aggregate events.'
                                      ' Required amount of events: {1}'.format(correlate_for, n_of_events))

    # wait for additional events
    time.sleep(correlate_for)
    logger.debug('Aggregation finished. Event counter for event [{0}] is: {1}.'
                 .format(event_name, cache[cache_id][error]['counter']))
    if cache[cache_id][error]['counter'] == n_of_events:
        if use_oats_case:
            __update_db_case(current_case, cache[cache_id][error]['counter'], event_name)
        logger.debug('Aggregation successful.'
                     .format(event_name))
        EventProcessor.process_event(data=data, host=host, timestamp=timestamp,
                                     sensor_type=sensor_type, event_name=event_name, severity=severity,
                                     case=current_case, influx_write=False)
    else:
        if use_oats_case:

            __update_db_case(current_case, cache[cache_id][error]['counter'], event_name)
        logger.debug('Aggregation not successful.'
                     .format(alternative_event_name))
        EventProcessor.process_event(data=data, host=host, timestamp=timestamp,
                                     sensor_type=sensor_type, event_name=alternative_event_name, severity=severity,
                                     case=current_case, influx_write=False)