Exemplo n.º 1
0
def validate_backup_repo_exists(client_config, repository):
    """[summary]
    Validates backup repository exists in ES/OS

    Args:
        client_config ([dict]): [Client configuration]
        repository ([str]): [Repository to verify exists]

    Raises:
        Exception: [On error, print error and retry]

    Returns:
        [bool]: [Does backup repository exist]
    """
    try:
        elastic_connection = es.build_es_connection(client_config)
        repositories = elastic_connection.cat.repositories(format='json')
        elastic_connection.close()
        for record in repositories:
            if repository == record['id']:
                print(
                    f"Backup repository {repository} exists and is registered")
                return True
    except Exception as e:
        elastic_connection.close()
        print("Operation failed - Validate backup repo exists")
        raise Exception(e)
    # If it makes it this far the repo does not exist, fail
    print(f"Backup repository {repository} not registered")
    return False
Exemplo n.º 2
0
def forcemerge_indices(client_config, index, index_forcemerge_policies):
    elastic_connection = es.build_es_connection(client_config)
    newest_record = ""
    newest_record = es.get_newest_document_date_in_index(
        client_config, index, elastic_connection)
    # make sure newest record is not empty
    if newest_record != "":
        # Get the index specific forcemerge policy
        policy = es.check_index_forcemerge_policy(index,
                                                  index_forcemerge_policies)
        # Get policy forcemerge days from specific policy
        policy_days = index_forcemerge_policies[policy]
        # Get current datetime
        current_date = datetime.utcnow()
        # Figure out how many days since current_date vs. newest_record
        days_ago = (current_date - newest_record).days
        # Check if days_ago is greater than or equal to policy date
        # If greater than or equal to policy date, delete index
        if days_ago >= policy_days:
            # Delete old index
            status = elastic_connection.indices.forcemerge(index)
            if '_shards' in status:
                if 'total' in status['_shards'] and 'successful' in status[
                        '_shards']:
                    if status['_shards']['total'] == status['_shards'][
                            'successful']:
                        print("Forcemerge for " + index + " successful")
                else:
                    print("Forcemerge for " + index + " unsuccessful")
            else:
                print("Forcemerge for " + index + " unsuccessful")
    elastic_connection.close()
Exemplo n.º 3
0
def delete_snapshot_in_repository(client_config, repository, snapshot):
    """[summary]
    Deletes a snapshot from a backup repository

    Args:
        client_config ([dict]): [Client configuration]
        repository ([str]): [Backup repository]
        snapshot ([str]): [Snapshot full name]

    Raises:
        Exception: [On error, print error and retry]

    Return:
        ([bool]): [Did snapshot get removed]
    """
    elastic_connection = es.build_es_connection(client_config)
    try:
        delete_status = elastic_connection.snapshot.delete(
            repository, snapshot=snapshot)
        elastic_connection.close()
        if 'acknowledged' in delete_status:
            if delete_status['acknowledged'] == True:
                print("Snapshot " + snapshot + " deleted successfully")
                return True
            else:
                print("Snapshot " + snapshot + " failed to delete successfully")
                return False
    except Exception as e:
        elastic_connection.close()
        print("Operation failed - Delete snapshot " +
              snapshot + " from " + repository)
        raise Exception(e)
    return False
Exemplo n.º 4
0
def get_snapshots_in_repository(client_config, repository):
    """[summary]
    Gets all snapshots from backup repository

    Args:
        client_config ([dict]): [Client configuration]
        repository ([str]): [Backup repository name]

    Raises:
        Exception: [On error, print error and retry]

    Returns:
        [dict]: [Dictionary of all snapshot information]
    """
    elastic_connection = es.build_es_connection(client_config)
    snapshots = {'snapshots': []}
    try:
        snapshots = elastic_connection.snapshot.get(repository, '_all')
        elastic_connection.close()
    except Exception as e:
        elastic_connection.close()
        print("Operation failed - Get snapshots from " + repository)
        raise Exception(e)
    elastic_connection.close()
    return snapshots
Exemplo n.º 5
0
def delete_old_indices(client_config, index, index_retention_policies):
    """Deletes indices past retention policy

    Args:
        client_config (dict): Client configuration
        index (str): Index name
        index_retention_policies (dict): Retention policy
    """
    elastic_connection = es.build_es_connection(client_config)
    newest_record = ""
    newest_record = es.get_newest_document_date_in_index(
        client_config, index, elastic_connection)
    # make sure newest record is not empty
    if newest_record != "":
        # Get the index specific retention policy
        policy = es.check_index_retention_policy(index,
                                                 index_retention_policies)
        # Get policy retention days from specific policy
        policy_days = index_retention_policies[policy]
        # Get current datetime
        current_date = datetime.utcnow()
        # Figure out how many days since current_date vs. newest_record
        days_ago = (current_date - newest_record).days
        # Check if days_ago is greater than or equal to policy date
        # If greater than or equal to policy date, delete index
        if days_ago >= policy_days:
            # Delete old index
            es.delete_index(client_config, index)
    elastic_connection.close()
def get_index_template(client_config, template_name):
    try:
        elastic_connection = es.build_es_connection(client_config)
        index_template = elastic_connection.indices.get_template(template_name)
        elastic_connection.close()
        return index_template
    except:
        return "Not found"
Exemplo n.º 7
0
def reingest_data(json_data, esclient):
    elasticsearch_connection = es.build_es_connection(clients[esclient])
    results = es.get_list_by_chunk_size(json_data, 100)
    for result in results:
        es.bulk_insert_data_to_es(elasticsearch_connection,
                                  result,
                                  "accounting",
                                  bulk_size=100)
    elasticsearch_connection.close()
def check_for_mapping_conflicts(client_config, indices, compare_mapping):
    elastic_connection = es.build_es_connection(client_config)
    for index in indices:
        mapping = elastic_connection.indices.get_mapping(index)
        index_mapping = mapping[index]['mappings']['properties']
        differences = []
        for diff in list(dictdiffer.diff(index_mapping, compare_mapping)):
            differences.append(diff)
        if len(differences) > 0:
            print("Index template does not match index " + index + ". Changes below")
            print(differences)
Exemplo n.º 9
0
def allocate_indices(client_config, index, index_allocation_policies):
    """Processes index allocations per index age

    Args:
        client_config (dict): Client configuration
        index (str): Index name
        index_allocation_policies (dict): Allocation policy
    """
    elastic_connection = es.build_es_connection(client_config)
    newest_record = ""
    newest_record = es.get_newest_document_date_in_index(
        client_config, index, elastic_connection)
    # make sure newest record is not empty
    if newest_record != "":
        # Get the index specific allocation policy
        policy = es.check_index_allocation_policy(index,
                                                  index_allocation_policies)
        # Get policy allocation days from specific policy
        policy_days = index_allocation_policies[policy]
        # Get current datetime
        current_date = datetime.utcnow()
        # Figure out how many days since current_date vs. newest_record
        days_ago = (current_date - newest_record).days
        # Check if days_ago is greater than or equal to policy date
        # If greater than or equal to policy date, delete index
        policy_days = dict(
            sorted(policy_days.items(), key=lambda item: item[1]))
        allocation_type = ''
        for key, value in policy_days.items():
            if value <= days_ago:
                allocation_type = key
        if allocation_type != '':
            # Change index allocation per policy
            index_settings = elastic_connection.indices.get_settings(
                index=index)
            index_settings = index_settings[index]['settings']['index']
            box_type = 'hot'
            if 'routing' in index_settings:
                if 'allocation' in index_settings['routing']:
                    if 'require' in index_settings['routing']['allocation']:
                        if 'box_type' in index_settings['routing'][
                                'allocation']['require']:
                            box_type = index_settings['routing']['allocation'][
                                'require']['box_type']
            if allocation_type != box_type:
                print("Changing allocation of index " + str(index) + \
                " to " + str(allocation_type))
                elastic_connection.indices.put_settings(
                    index=index,
                    body={
                        "index.routing.allocation.require.box_type":
                        allocation_type
                    })
    elastic_connection.close()
Exemplo n.º 10
0
def apply_forcemerge_to_indices(indices, index_forcemerge_policies,
                                client_config):
    elastic_connection = es.build_es_connection(client_config)
    with ThreadPoolExecutor(max_workers=es.get_lowest_data_node_thread_count(
            client_config)) as executor:
        for index in indices:
            index = str(index['index'])
            # Only proceed if index is not a special index
            if not es.check_special_index(index):
                future = executor.submit(forcemerge_indices, client_config,
                                         index, index_forcemerge_policies)
    elastic_connection.close()
Exemplo n.º 11
0
def take_snapshot(client_config, repository, snapshot, body):
    """[summary]
    Creates a backup snapshot

    Args:
        client_config ([dict]): [Client configuration]
        repository ([str]): [ES/OS repository name]
        snapshot ([str]): [Name of snapshot to create]
        body ([dict]): [Details for backup job]

    Raises:
        Exception: [If error, print error and retry]

    Returns:
        [bool]: [Backup job status]
    """
    try:
        if es.check_cluster_health_status(client_config, 'yellow'):
            print("Cluster health check passed")
    except Exception as e:
        raise Exception(e)

    try:
        current_date = datetime.strftime(
            datetime.utcnow(), '%Y-%m-%d_%H:%M:%S')
        snapshot_name = f"{snapshot}_{current_date}"
        if DEBUG_ENABLED == "1":
            print(f"Triggering backup for {snapshot_name}*")
            print("Repository is " + repository +
                  "| snapshot is " + snapshot_name + " | body is:")
            print(json.dumps(body))
        elastic_connection = es.build_es_connection(client_config)
        backup_job = elastic_connection.snapshot.create(
            repository, snapshot_name, body, wait_for_completion=False, request_timeout=30)
        elastic_connection.close()
        if 'accepted' in backup_job:
            if backup_job['accepted']:
                return True
            else:
                print("Backup snapshot " + snapshot_name + " failed to create")
                return False

    except Exception as e:
        elastic_connection.close()
        print("Operation failed - Create snapshot " +
              snapshot + " for repo " + repository)
        raise Exception(e)
Exemplo n.º 12
0
def get_indices_within_limit_age(client_config, indices, limit_age):
    """[summary]
    Takes a list of indices and looks to see if the most recent document
    is within a specified @timestamp age based on limit_age

    Args:
        client_config ([dict]): [Client configuration]
        indices ([list]): [List of indices to look through]
        limit_age ([int]): [Age in terms of within X days ago]

    Raises:
        Exception: [If error, print and retry]

    Returns:
        [list]: [List of indices that were within limit_age]
    """
    limit_age = limit_age * 86400
    current_date = datetime.utcnow()
    indices_within_limit_age = []
    body = '{"aggs": {"indices": {"terms": {"field": "_index","order": {"1": "desc"},"size": 50000},"aggs": {"1": {"max": {"field": "@timestamp"}}}}},"size": 0,"_source": {"excludes": []}}'

    for index in indices:
        elastic_connection = es.build_es_connection(client_config)
        try:
            if DEBUG_ENABLED == "1":
                print("Index is " + index)
                print(f"Limit age is {limit_age}\nBody is\n{body}")
            result = elastic_connection.search(index=index + "*", body=body)
            elastic_connection.close()
            if DEBUG_ENABLED == "1":
                print(result)
            for index in result['aggregations']['indices']['buckets']:
                index_name = index['key']
                index_date = dateparser.parse(
                    index['1']['value_as_string']).replace(tzinfo=None)
                seconds_ago = (current_date - index_date).total_seconds()
                if DEBUG_ENABLED == "1":
                    print(f"Index name is {index_name}")
                    print(f"Policy {limit_age} vs index {seconds_ago}")
                if seconds_ago <= limit_age:
                    indices_within_limit_age.append(index_name)
        except Exception as e:
            elastic_connection.close()
            raise Exception(e)
    return indices_within_limit_age
Exemplo n.º 13
0
def apply_retention_to_old_indices(indices, index_retention_policies,
                                   client_config):
    """Apply retention to indices older than policy

    Args:
        indices (array): List of indices
        index_retention_policies (dict): Retention policy
        client_config (dict): Client configuration
    """
    elastic_connection = es.build_es_connection(client_config)
    with ThreadPoolExecutor(max_workers=es.get_lowest_data_node_thread_count(
            client_config)) as executor:
        for index in indices:
            index = str(index['index'])
            # Only proceed if index is not a special index
            if not es.check_special_index(index):
                executor.submit(delete_old_indices, client_config, index,
                                index_retention_policies)
    elastic_connection.close()
Exemplo n.º 14
0
def create_index_template(client_config, group, last_index):
    # Base template settings
    template = {
        "order": 5,
        "version": 60001,
        "settings": {
            "index": {
                "mapping": {
                    "total_fields": {
                    "limit": "15000"
                    }
                },
                "refresh_interval": "30s",
                "number_of_shards": "1",
                "number_of_replicas": "1"
            }
        },
        "mappings": {
        },
        "aliases": {}
    }

    try:
        elastic_connection = es.build_es_connection(client_config)
        # Get index mappings from most current index
        field_mappings = elastic_connection.indices.get_mapping(last_index)
        # Extract mappings from most current index
        mapping = field_mappings[last_index]['mappings']
        # Update base template to have index mappings
        template['mappings'] = mapping
        # Set index patterns template should match on
        template['index_patterns'] = [ group + "-*"]
        # Create the template
        elastic_connection.indices.put_template(group, body=template)
        elastic_connection.close()
        return True
    except:
        return False
Exemplo n.º 15
0
def calculate_accounting(client_config, client_name):
    settings = load_settings()
    # Set today's current datetime
    today = datetime.now()
    date_time = today.strftime("%Y%m%d")
    # Check if client accounting data already calculated today
    if path.exists(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json"):
        print("Accounting already calculated for " + client_name + " today: " + str(date_time))
        return True
    else:
        print("Calculating accounting data for " + client_name)
        # Check cluster health - Expect Yellow to continue
        if es.check_cluster_health_status(client_config, settings['accounting']['health_check_level']):
            elastic_connection = es.build_es_connection(client_config)
            # Grab the client specific allocation policy (tiering policy)
            index_allocation_policies = get_allocation_policy(client_config)

            # Next, get information on all current indices in client cluster
            indices = es.es_get_indices(client_config)
            print("Client " + client_name + " has " + str(len(indices)) + ' indices')

            accounting_records = []
            special_index_size = 0
            # Loop through each index
            for index in indices:
                if not es.check_special_index(index['index']):
                    # Grab the current index's allocation policy based on index name
                    policy = es.check_index_allocation_policy(index['index'], index_allocation_policies)
                    # Lookup the policy's # of days setting
                    policy_days = index_allocation_policies[policy]

                    # Get current datetime
                    current_date = datetime.now()
                    # Get index datetime
                    index_date = datetime.strptime(index['creation.date.string'], '%Y-%m-%dT%H:%M:%S.%fZ')
                    # Figure out how many days since current_date vs. index_date
                    days_ago = (current_date - index_date).days
                    
                    # Build client specific daily accounting records
                    # Convert index size from bytes to gigabytes
                    index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8)
                    # Calculate indices daily cost
                    # If index is older than policy_days, set disk type to sata
                    # and make sure index is set to proper allocation attribute
                    if days_ago >= policy_days:
                        cost = round(float(index_size_in_gb) * settings['accounting']['sata_cost'], 8)
                        disk_type = 'sata'
                    else:
                        cost = round(float(index_size_in_gb) * settings['accounting']['ssd_cost'], 8)
                        disk_type = 'ssd'
                    index_group = es.get_index_group(index['index'])
                    accounting_record = {
                        'name': index['index'],
                        'client': client_name,
                        'size': float(index_size_in_gb),
                        'logs': int(index['docsCount']),
                        'disk': disk_type,
                        'cost': float(cost),
                        'index_creation_date': index['creation.date.string'],
                        '@timestamp': str(current_date.isoformat()),
                        'index_group': index_group,
                        'allocation_policy': str(policy),
                        'current_policy_days': int(policy_days)
                    }
                    accounting_records.append(accounting_record)
                else:
                    index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8)
                    special_index_size += index_size_in_gb
            # Check TOML for device tracking settings, if exists, calculate
            if 'device_tracking_inclusion' in settings['accounting']:
                device_by_ip = []
                device_by_computer_name = []
                device_by_user = []
                total_devices = 0
                for inclusion in settings['accounting']['device_tracking_inclusion']:
                    index = settings['accounting']['device_tracking_inclusion'][inclusion]['index']
                    tracking_field = settings['accounting']['device_tracking_inclusion'][inclusion]['tracking_field']
                    search = settings['accounting']['device_tracking_inclusion'][inclusion]['search']
                    count_as = settings['accounting']['device_tracking_inclusion'][inclusion]['count_as']
                    
                    response = es.aggregate_search(elastic_connection, index, search, 'value_count', tracking_field, sort='@timestamp', limit_to_fields=[tracking_field])
                    if count_as == "computer":
                        device_by_computer_name += response
                    if count_as == "ip":
                        device_by_ip += response
                    if count_as == "user":
                        device_by_user += response
            if 'device_tracking_exclusion' in settings['accounting']:
                for exclusion in settings['accounting']['device_tracking_exclusion']:
                    index = settings['accounting']['device_tracking_exclusion'][exclusion]['index']
                    field_to_exclude_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_exclude_against']
                    field_to_match_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against']
                    field_to_match_against_count_as_type = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against_count_as_type']
                    search = settings['accounting']['device_tracking_exclusion'][exclusion]['search']
                    count_as = settings['accounting']['device_tracking_exclusion'][exclusion]['count_as']
                    response = es.multiple_aggregate_search(elastic_connection, index, search, 'value_count', field_to_match_against, field_to_exclude_against, sort='@timestamp', limit_to_fields=[field_to_exclude_against,field_to_match_against])

                    if field_to_match_against_count_as_type == "computer":
                        # Look for computers in device_by_computer_name, if found
                        # remove response value from field_to_exclude_against
                        for computer in response.keys():
                            if computer in device_by_computer_name:
                                print(f"Removing {computer} from {field_to_exclude_against}")
                                exclusion = response[computer]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                    if field_to_match_against_count_as_type == "ip":
                        # Look for ips in device_by_ip, if found
                        # remove response value from field_to_exclude_against
                        for ip in response.keys():
                            print(ip)
                            if ip in device_by_computer_name:
                                print(f"Removing {ip} from {field_to_exclude_against}")
                                exclusion = response[ip]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                    if field_to_match_against_count_as_type == "user":
                        # Look for users in device_by_user, if found
                        # remove response value from field_to_exclude_against
                        for user in response.keys():
                            if user in device_by_computer_name:
                                print(f"Removing {user} from {field_to_exclude_against}")
                                exclusion = response[user]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                device_by_user_count = len(set(device_by_user))
                device_by_computer_name_count = len(set(device_by_computer_name))
                device_by_ip_count = len(set(device_by_ip))
                total_devices = device_by_user_count + device_by_computer_name_count + device_by_ip_count
                accounting_record = {
                        'client': client_name,
                        'device_count': int(total_devices),
                        '@timestamp': str(current_date.isoformat()),
                    }
                if os.path.isdir(settings['accounting']['output_folder']):
                    with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-device-" + date_time + ".json", 'a') as f:
                        json_content = json.dumps(accounting_record)
                        f.write(json_content)
                        f.write('\n')
                else:
                    print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
            # Appends newest record date into accounting_record
            #for accounting_record in accounting_records:
                #accounting_record['newest_document_date'] = str(es.get_newest_document_date_in_index(client_config, index['index'], elastic_connection).isoformat())
            if not settings['settings']['debug'] and len(accounting_records) != 0:
                for accounting_record in accounting_records:
                    # Create a backup copy of each accounting record
                    if os.path.isdir(settings['accounting']['output_folder']):
                        with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json", 'a') as f:
                            json_content = json.dumps(accounting_record)
                            f.write(json_content)
                            f.write('\n')
                    else:
                        print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
            else:
                print("Debug enabled or no data to save. Not creating accounting file")

            elastic_connection.close()

            cluster_stats = es.get_cluster_stats(client_config)
            # Convert cluster size from bytes to gigabytes
            cluster_size = round(float(cluster_stats['indices']['store']['size_in_bytes']) / 1024 / 1024 / 1024, 8)
            print("Total cluster size is: " + str(cluster_size) + " GB")
            if 'device_tracking_inclusion' in settings['accounting']:
                print(f"Total device tracking is {total_devices}")

            if cluster_size > 1:
                if os.path.isdir(settings['accounting']['output_folder']) and len(accounting_records) != 0 and not settings['settings']['debug']:
                    with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json") as f:
                        accounting_file = f.readlines()
                    total_accounting_size = 0
                    for record in accounting_file:
                        json_object = json.loads(record)
                        total_accounting_size += float(json_object['size'])
                    total_accounting_size = round(total_accounting_size, 8)
                    print("Total accounting record size is: " + str(total_accounting_size) + " GB")

                    special_index_size = round(special_index_size, 2)
                    print("Total special index size is : " + str(special_index_size) + " GB")

                    total_accounting_index_size = special_index_size + total_accounting_size
                    print("Accounting and special index size equals : " + str(total_accounting_index_size) + " GB")

                    difference_size = cluster_size - total_accounting_index_size
                    print("Difference is " + str(difference_size) + " GB")
                    if difference_size >= 20:
                        message = "Accounting verification is off by more than 20.0 GB. Please find out why. This test is performed by comparing the current cluster size against the records in the accounting JSON output files.\n\nTotal cluster size is : " + str(cluster_size) + " GB\n\nTotal accounting record size is: " + str(total_accounting_size) + " GB\n\nTotal special index size is : " + str(special_index_size) + " GB\n\nAccounting and special index size equals : " + str(total_accounting_index_size) + " GB\n\nDifference is " + str(difference_size) + " GB\n\nThe size difference can be due to the script taking longer to run and the index sizes growing during the accounting calculation. However, if the difference is significant, some other problem likely occurred."
                        send_notification(client_config, "accounting verification", "Failed", message, jira=settings['accounting']['ms-teams'], teams=settings['accounting']['jira'])
                else:
                    if os.path.isdir(settings['accounting']['output_folder']):
                        print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
                    if len(accounting_records) != 0:
                        print("No accounting records to write to disk. Empty cluster")
                

                if len(accounting_records) != 0 and not settings['settings']['debug'] and settings['accounting']['output_to_es']:
                    print("Sending accounting records to ES")
                    elasticsearch_connection = es.build_es_connection(client_config)
                    results = es.get_list_by_chunk_size(accounting_records, 100)
                    for result in results:
                        es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100)
                    elasticsearch_connection.close()
                    clients = load_configs()
                    if client_name != settings['accounting']['send_copy_to_client_name'] and settings['accounting']['send_copy_to_client_name'] != '':
                        elasticsearch_connection = es.build_es_connection(clients[settings['accounting']['send_copy_to_client_name']])
                        results = es.get_list_by_chunk_size(accounting_records, 100)
                        for result in results:
                            es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100)
                        elasticsearch_connection.close()
                    return True
                else:
                    if not settings['settings']['debug']:
                        print("No index data found for accounting")
                        return True
                    else:
                        return True
            else:
                return True
        else:
            settings = load_settings()
            print("Accounting operation failed for " + client_name + ". Cluster health does not meet level:  " + settings['accounting']['health_check_level'])
            return False
def extrapolate_files(files):
    # Find missing value and extrapolate
    print("Client has " + str(len(files)))
    files.sort()
    newest_file = files[len(files) - 1]
    oldest_file = files[0]
    newest_file_date = convert_file_date(os.path.basename(newest_file), client)
    oldest_file_date = convert_file_date(os.path.basename(oldest_file), client)
    days = (newest_file_date - oldest_file_date).days
    if int(days) + 1 == len(files):
        print("PASS - Client " + client + " should have " +
              str(int(days) + 1) + " of accounting data. " + str(len(files)) +
              " found")
    else:
        print("FAIL - Client " + client + " should have " +
              str(int(days) + 1) + " of accounting data. " + str(len(files)) +
              " found")
        json_data = load_json_file(oldest_file)
        sizes = {}
        #growth_rates = {}
        while days > 0:
            oldest_file_date += timedelta(days=1)
            days = days - 1
            file_date = datetime.strftime(oldest_file_date, '%Y%m%d')
            file_to_check = "/cloud/cloud_configs/business_functions/accounting/" + client + "_accounting-" + file_date + ".json"
            if os.path.exists(file_to_check):
                json_data = load_json_file(file_to_check)
                for record in json_data:
                    index_group = record['index_group']
                    #if index_group not in growth_rates:
                    #    growth_rates[index_group] = []
                    if index_group not in sizes:
                        sizes[index_group] = []
                    if record['size'] > 0:
                        sizes[index_group].append(record['size'])
                    #if len(sizes[index_group]) > 1:
                    #    percent_difference = (sizes[index_group][-1] - sizes[index_group][-2]) / sizes[index_group][-1]
                    #    # Wait for at least 10 values before calculating IQR
                    #    if len(growth_rates[index_group]) > 10:
                    #        q1 = np.quantile(growth_rates[index_group],0.30)
                    #        q3 = np.quantile(growth_rates[index_group],0.70)
                    #        # Only save values above the 30% standard deviation but below 70%
                    #        # and change is not double in size
                    #        if growth_rates[index_group][-1] > q1 and growth_rates[index_group][-1] < q3 and abs(percent_difference) <= 100:
                    #            growth_rates[index_group].append(percent_difference)
                    #    else:
                    #        # Ignore change rates above 100 as that is extreme daily growth
                    #        # Often caused during rollout of new data sources or agents
                    #        if abs(percent_difference) <= 100:
                    #            growth_rates[index_group].append(percent_difference)
            else:
                print("File " + file_to_check + " not found")
                mean = 0
                es_data = []
                for record in json_data:
                    index_group = record['index_group']
                    if len(sizes[index_group]) != 0:
                        # Only apply moving averages if there are at least 5 stored
                        # growth rates or more
                        if len(sizes[index_group]) >= 5:
                            # Apply Exponential Moving Averages to smooth the data set
                            df = pd.DataFrame({'data': sizes[index_group]})
                            df['ewm_alpha_1'] = df['data'].ewm(
                                span=7, adjust=False).mean()
                            # Store the latest moving average
                            mean = df['ewm_alpha_1'].iloc[-1]
                        else:
                            # Not enough values to safely predict moving average
                            # set mean to 0 for no growth
                            mean = 0
                    if debug == 1:
                        print(index_group + " previous size " +
                              str(round(record['size'], 2)) + " and cost " +
                              str(round(record['cost'], 2)) +
                              " new moving average is " + str(mean))
                    if mean >= 0:
                        record['size'] = round(mean, 2)
                    else:
                        record['size'] = 0
                    if record['disk'] == 'ssd':
                        record['cost'] = round(record['size'] * .001, 2)
                    if record['disk'] == 'sata':
                        record['cost'] = round(record['size'] * .003, 2)
                    record['@timestamp'] = str(oldest_file_date.isoformat())
                    es_data.append(record)
                    if debug == 1:
                        print("New size " + str(record['size']) +
                              " and cost " + str(record['cost']))
                    # Create a backup copy of each accounting record
                    if debug == 0:
                        with open(file_to_check, 'a') as f:
                            json_content = json.dumps(record)
                            f.write(json_content)
                            f.write('\n')
                    else:
                        pass
                if debug == 0:
                    if len(es_data) != 0 and debug == 0:
                        elasticsearch_connection = es.build_es_connection(
                            clients[client])
                        results = es.get_list_by_chunk_size(es_data, 100)
                        for result in results:
                            es.bulk_insert_data_to_es(elasticsearch_connection,
                                                      result,
                                                      "accounting",
                                                      bulk_size=100)
                        elasticsearch_connection.close()
                        elasticsearch_connection = es.build_es_connection(
                            clients["ha"])
                        results = es.get_list_by_chunk_size(es_data, 100)
                        for result in results:
                            es.bulk_insert_data_to_es(elasticsearch_connection,
                                                      result,
                                                      "accounting",
                                                      bulk_size=100)
                        elasticsearch_connection.close()
Exemplo n.º 17
0
def calculate_audit_trail(client_config, settings):
    """Calculates an audit trail record for MSSP clients

    Args:
        client_config (dict): Client configuration file loaded from json content
        settings (dict): Settings loaded from settings.toml
    """
    es_connection = es.build_es_connection(client_config)
    response = es.run_search(es_connection,
                             'reflex-organizations',
                             '_exists_:name',
                             sort="created_at",
                             limit_to_fields=['name', 'uuid'],
                             size=10000)
    organizations = es.return_field_mapped_to_value_from_query(
        response, 'name', 'uuid')
    start_time = get_start_time()
    end_time = get_end_time()
    for client_name, value in settings['mssp']['clients'].items():
        tenant_name = settings['mssp']['clients'][client_name]['tenant_name']
        print(
            f"Processing tenant for {client_name} with tenant name of {tenant_name}"
        )
        email = value['email']
        uuid = organizations[tenant_name]
        get_audit_trail(es_connection,
                        start_time,
                        uuid,
                        "severity",
                        end_time=end_time)
        report = f"Audit Trail Report for {tenant_name}\r\n\r\n"
        report = report + "This report represents events reviewed by H and A Security Solutions" \
            + " LLC during daily event review activities. The report includes a breakdown of" \
            + " events that have completed review.\r\n\r\nEvent statuses:\r\n\r\n"
        total_events = 0
        dismissed_events = get_audit_trail(es_connection,
                                           start_time,
                                           uuid,
                                           "dismiss_reason.keyword",
                                           end_time=end_time)
        for name, _ in dismissed_events.items():
            if valid_uuid(name):
                dismiss_reason = "Other"
            else:
                dismiss_reason = name
            report = report + \
                f"{dismiss_reason} - Number of events {dismissed_events[name]}\r\n"
            total_events = total_events + dismissed_events[name]
        report = report + \
            f"\r\nTotal Events Reviewed: {total_events}\r\n\r\nTop Rules\r\n\r\n"
        total_events = 0
        top_rules = get_audit_trail(es_connection,
                                    start_time,
                                    uuid,
                                    "title",
                                    end_time=end_time)
        for rule, _ in top_rules.items():
            report = report + \
                f"{rule} - Number of events: {top_rules[rule]}\r\n"
        top_sources = get_audit_trail(es_connection,
                                      start_time,
                                      uuid,
                                      "source.keyword",
                                      end_time=end_time)
        report = report + "\r\nTop Sources\r\n\r\n"
        for source, _ in top_sources.items():
            report = report + \
                f"{source} - Number of events {top_sources[source]}\r\n"

        for address in list(email):
            send_email(address, "Audit Trail Report", report)
        print(report)