Exemplo n.º 1
0
def apply_backup_retention_policies(client_config, job, retention, repository):
    """[summary]
    Deletes snapshots older than backup policy retention

    Args:
        client_config ([dict]): [Client configuration]
        job ([str]): [Name of index to process such as winlogbeat]
        retention ([int]): [How many days to retain snapshot]
        repository ([str]): [Name of backup repository]
    """
    snapshots = get_snapshots_in_repository(client_config, repository)
    for snapshot in snapshots['snapshots']:
        snapshot_info = build_snapshot_info(snapshot)
        if snapshot_info['short_name'] == job:
            if DEBUG_ENABLED == "1":
                print("Snapshot " + snapshot_info['name'] + " is " + str(
                    snapshot_info['days_ago']) + " days old compared to policy of " + str(retention))
            # Check if days_ago is greater than or equal to policy date
            # If greater than or equal to policy date, delete snapshot
            if 'days_ago' in snapshot_info:
                if snapshot_info['days_ago'] >= retention:
                    print("Attempting to delete snapshot " +
                          snapshot_info['name'])
                    # Delete old snapshot
                    if not delete_snapshot_in_repository(client_config, repository, snapshot_info['name']):
                        # Should not hit this point unless retry failed for an hour
                        message = "Backup snapshot removal failed for " + \
                            client_config['client_name'] + " for " + \
                            job + " in repository " + repository
                        print(message)
                        send_notification(client_config, "backup", "Failed", message,
                                          teams=settings['backup']['ms-teams'], jira=settings['backup']['jira'])
Exemplo n.º 2
0
def delete_index(client_config, index):
    try:
        # Start connection to Elasticsearch
        es = build_es_connection(client_config)
        # Check if index is a single string or a list of indices
        if isinstance(index, str):
            indices = index
            # Delete the index
            status = es.indices.delete(index=index)
            get_index_operation_message(indices, "delete", status, client_config)
        if isinstance(index, list):
            # Convert list into chunks of 50
            # This will create a list of lists up to 50 indices per list
            chunks = get_list_by_chunk_size(index, 50)
            for chunk in chunks:
                indices = ",".join(chunk)
                # Delete the group of indices
                status = es.indices.delete(index=indices)
                get_index_operation_message(indices, "delete", status, client_config)
        # Close Elasticsearch connection
        es.close()
    except:
        e = sys.exc_info()
        print("Deletion job failed")
        settings = load_settings()
        send_notification(client_config, "retention", "Failed", "Deletion job failed for indices " + str(indices), teams=settings['retention']['ms-teams'], jira=settings['retention']['jira'])
        print(e)
def fix_mapping_conflicts(manual_client):
    settings = load_settings()
    retry_count = 60
    sleep_time = 60
    success = 0
    if "fixmapping" in settings:
        if "enabled" in settings:
            fixmapping_enabled = settings['fixmapping']['enabled']
        else:
            fixmapping_enabled = True
    else:
        fixmapping_enabled = True
    if fixmapping_enabled:
        # Load all client configurations from /opt/maintenance/*.json
        clients = load_configs()
        # Loop through each client to perform accounting per client
        for client in clients:
            # Set nice variable names
            client_name = clients[client]['client_name']
            client_config = clients[client]
            # If client set at command line only run it otherwise
            # execute for all clients
            if manual_client == "" or client_name == manual_client:
                print("Processing fix mappings for " + client_name)
                if settings['settings']['limit_to_client'] == client or settings['settings']['limit_to_client'] == "":
                    while retry_count >= 0 and success == 0:
                        indices = es.es_get_indices(client_config)
                        index_groups = {}
                        for index in indices:
                            # Do not mess with special indices
                            if not es.check_special_index(index['index']):
                                index_group = es.get_index_group(index['index'])
                                if index_group not in index_groups:
                                    index_groups[index_group] = []
                                index_groups[index_group].append(index['index'])

                        for group in index_groups:
                            indices = index_groups[group]
                            indices.sort()
                            last_index = indices[-1]
                            if get_index_template(client_config, group) == "Not found":
                                print("Missing index template for " + str(group) + " - creating one off highest index number")
                                create_index_template(client_config, group, last_index)
                            # TESTING
                            template = get_index_template(client_config, group)
                            template_mappings = template[group]['mappings']['properties']
                            if group == "logstash-proofpoint":
                                check_for_mapping_conflicts(client_config, index_groups[group], template_mappings)

                        success = 1
                    else:
                        if retry_count == 0:
                            message = "Fix mapping operation failed.\n\nIt is also possible that connections are unable to be made to the client/nginx node. Please fix.\n\nRemember that in order for client's to be properly build you will need to get their cluster status to **Green** or **Yellow** and then re-run the following command:\n\n**python3 /opt/elastic-ilm/fix_mapping.py --client " + client_name + "**"
                            send_notification(client_config, "fixmapping", "Failed", message, teams=settings['fixmapping']['ms-teams'], jira=settings['fixmapping']['jira'])
                    if success == 0:
                        # Decrese retry count by one before trying while statement again
                        retry_count = retry_count - 1
                        print("Retry attempts left for fix mapping operation set to " + str(retry_count) + " sleeping for " + str(sleep_time) + " seconds")
                        time.sleep(sleep_time)
Exemplo n.º 4
0
def apply_forcemerge_policies(manual_client=""):
    settings = load_settings()
    retry_count = 60
    sleep_time = 60
    success = 0
    if "forcemerge" in settings:
        if "enabled" in settings:
            forcemerge_enabled = settings['forcemerge']['enabled']
        else:
            forcemerge_enabled = True
    else:
        forcemerge_enabled = True
    if forcemerge_enabled:
        # Load all client configurations from /opt/maintenance/*.json
        clients = load_configs()
        # Loop through each client to perform accounting per client
        for client in clients:
            # Set nice variable names
            client_name = clients[client]['client_name']
            print("Processing forcemerge for " + client_name)
            client_config = clients[client]
            # If client set at command line only run it otherwise
            # execute for all clients
            if manual_client == "" or client_name == manual_client:
                if settings['settings'][
                        'limit_to_client'] == client or settings['settings'][
                            'limit_to_client'] == "":
                    while retry_count >= 0 and success == 0:
                        # Grab the client's forcemerge policies
                        index_forcemerge_policies = get_forcemerge_policy(
                            client_config)
                        # Next, get information on all current indices in cluster
                        indices = es.es_get_indices(client_config)
                        # Get the list of indices that are older than the forcemerge policy
                        apply_forcemerge_to_indices(indices,
                                                    index_forcemerge_policies,
                                                    client_config)
                        success = 1
                    else:
                        if retry_count == 0:
                            message = "forcemerge operation failed.\n\nIt is also possible that connections are unable to be made to the client/nginx node. Please fix.\n\nRemember that in order for client's to be properly build you will need to get their cluster status to **Green** or **Yellow** and then re-run the following command:\n\n**python3 /opt/elastic-ilm/forcemerge.py --client " + client_name + "**"
                            send_notification(
                                client_config,
                                "forcemerge",
                                "Failed",
                                message,
                                teams=settings['forcemerge']['ms-teams'],
                                jira=settings['forcemerge']['jira'])
                    if success == 0:
                        # Decrese retry count by one before trying while statement again
                        retry_count = retry_count - 1
                        print(
                            "Retry attempts left for forcemerge operation set to "
                            + str(retry_count) + " sleeping for " +
                            str(sleep_time) + " seconds")
                        time.sleep(sleep_time)
Exemplo n.º 5
0
def run_accounting(manual_client=""):
    settings = load_settings()
    if settings['accounting']['enabled']:
        retry_count = settings['accounting']['retry_attempts']
        initial_retry_count = retry_count
        retry_list = []
        sleep_time = settings['accounting']['retry_wait_in_seconds']
        # Load all client configurations
        clients = load_configs()
        # Add all clients initially to retry_list for first run
        for client in clients:
            # If client set at command line only run it otherwise
            # execute for all clients
            if manual_client == "" or clients[client]['client_name'] == manual_client:
                retry_list.append(clients[client]['client_name'])
        # Loop through each client to perform accounting per client
        while retry_count >= 0 and len(retry_list) > 0:
            print("Accounting job processing for:")
            print(retry_list)
            if initial_retry_count != retry_count:
                print("Retry count set to " + str(retry_count))
                print("------------------------------\n")
            for client in clients:
                # Set nice variable names
                client_name = clients[client]['client_name']
                if client_name in retry_list:
                    client_config = clients[client]
                    if retry_count == 0:
                        # If on the last attempt, accept a health level of yellow
                        message = "Accounting operation failed.\n\nDue to failing 10 times, the health level was set to " + settings['accounting']['fallback_health_check_level'] + " and ran for client " + clients[client]['client_name'] + ". \n\nThis is not optimal. Please check to see if data should be purged and re-inserted with a green cluster."
                        send_notification(clients[client], "accounting", "Failed", message, jira=settings['accounting']['ms-teams'], teams=settings['accounting']['jira'])
                    # If client set at command line only run it otherwise
                    # execute for all clients
                    if manual_client == "" or client_name == manual_client:
                        # Trigger calculate accounting process
                        result = calculate_accounting(client_config, client_name)
                        if result:
                            # Remove successful client from retry_list
                            retry_list.remove(clients[client]['client_name'])
                        else:
                            print("Client " + client_name + " did not process correctly.")
                            if retry_count == 0:
                                if notification:
                                    message = "Accounting operation failed.\n\nIt is also possible that connections are unable to be made to the client/nginx node. Please fix.\n\nRemember that in order for client's to be properly build you will need to get their cluster status to **Green** and then re-run the following command:\n\npython3 /opt/cloud_operations/accounting.py --client " + client_name + "\n\nIf a green cluster is not possible by end of day, please run the following command to force run with a different color cluster:\n\npython3 /opt/cloud_operations/accounting.py --client " + client_name + " --health yellow"
                                    send_notification(client_config, "accounting", "Failed", message, jira=settings['accounting']['ms-teams'], teams=settings['accounting']['jira'])
            # Lower the retry_count by 1
            retry_count = retry_count - 1
            if retry_count >= 0 and len(retry_list) > 0:
                print("The below client(s) failed to process. Retry necessary:")
                print(retry_list)
                print("Retry count set to " + str(retry_count) + " sleeping for " + str(sleep_time) + " seconds")
                time.sleep(sleep_time)
Exemplo n.º 6
0
def take_snapshot_per_policies(client_config, job, backup_policy, repository, include_special=False):
    # Global backup grabs all indices
    if job == 'global':
        indices = es.get_write_alias_names(client_config)
        if 'limit_age' in backup_policy:
            indices = get_indices_within_limit_age(
                client_config, indices, backup_policy['limit_age'])
            index = modify_indices_to_string(
                indices) + "," + modify_indices_to_string(special_indices_to_backup, wildcard=True)
        else:
            index = '*'
    # .kibana backs up global state and .kibana indices
    elif job == '.kibana' or job == "special":
        index = modify_indices_to_string(
            special_indices_to_backup, wildcard=True)
    else:
        if 'limit_age' in backup_policy:
            backup_policy = {"retention": 1, "limit_age": 1}
            indices = get_indices_within_limit_age(
                client_config, [job], backup_policy['limit_age'])
            if include_special:
                index = modify_indices_to_string(
                    indices) + "," + modify_indices_to_string(special_indices_to_backup, wildcard=True)
            else:
                index = modify_indices_to_string(indices)
        else:
            index = str(job) + "*"

    body = {
        "indices": index,
        "ignore_unavailable": True,
        "include_global_state": True,
        "metadata": {
            "taken_by": "Elastic-ILM",
            "taken_because": "Scheduled backup per policy"
        }
    }
    if backup_policy['retention'] != 0:
        backup_job = take_snapshot(client_config, repository, job, body)
        if backup_job:
            print(f"Backup for {job} completed successfully")
        else:
            # Should not hit this point unless retry failed for an hour
            message = "Backup take snapshot failed for " + \
                client_config['client_name'] + " for " + \
                job + " in repository " + repository
            print(message)
            send_notification(client_config, "backup", "Failed", message,
                              teams=settings['backup']['ms-teams'], jira=settings['backup']['jira'])
Exemplo n.º 7
0
def rollover_client_indicies(client_config):
    settings = load_settings()
    # Get the rollover policy for the client
    index_rollover_policies = get_rollover_policy(client_config)
    retry_count = 60
    sleep_time = 60
    success = 0
    while retry_count >= 0 and success == 0:
        # Check cluster health - Expect Yellow to continue
        if es.check_cluster_health_status(
                client_config, settings['rollover']['health_check_level']):
            # Get current aliases members
            aliases = es.get_all_index_aliases(client_config)
            with ThreadPoolExecutor(
                    max_workers=es.get_lowest_data_node_thread_count(
                        client_config)) as executor:
                # Apply rollover to aliases
                for alias in aliases:
                    executor.submit(apply_rollover_policy_to_alias,
                                    client_config, alias,
                                    index_rollover_policies)
            success = 1
        else:
            if retry_count > 0:
                print("Rollover operation failed for " +
                      client_config['client_name'] +
                      ". Cluster health does not meet level:  " +
                      settings['rollover']['health_check_level'])
            else:
                message = "Rollover operation failed.\n\nIt is also possible that connections are unable to be made to the client/nginx node. Please fix.\n\nRemember that in order for client's to be properly build you will need to get their cluster status to **Green** or **Yellow** and then re-run the following command:\n\n**python3 /opt/elastic-ilm/rollover.py --client " + client_config[
                    'client_name'] + "**"
                send_notification(client_config,
                                  "rollover",
                                  "Failed",
                                  message,
                                  teams=settings['rollover']['ms-teams'],
                                  jira=settings['rollover']['jira'])
        if success == 0:
            # Decrese retry count by one before trying while statement again
            retry_count = retry_count - 1
            print("Retry attempts left for rollover operation set to " +
                  str(retry_count) + " sleeping for " + str(sleep_time) +
                  " seconds")
            time.sleep(sleep_time)
Exemplo n.º 8
0
def get_index_operation_message(index, operation, status, client_config):
    if check_acknowledged_true(status):
        print(operation.capitalize() + " successful for " + index)
        return True
    else:
        print(operation.capitalize() + " failed for " + index + " with a status of\n\n:" + str(status))
        settings = load_settings()
        if operation == "delete":
            policy = 'retention'
        if operation == "rollover":
            policy = 'rollover'
        if operation == 'forcemerge':
            policy = 'rollover'
        # Set fallback policy for notification settings
        if operation != 'delete' and operation != 'rollover' and operation != 'forcemerge':
            policy = 'retention'
        
        send_notification(client_config, operation.capitalize(), operation.capitalize() + " Failure", operation.capitalize() + " failed for " + index + " with a status of\n\n:" + str(status), teams=settings[policy]['ms-teams'], jira=settings[policy]['jira'])
        return False
Exemplo n.º 9
0
def calculate_accounting(client_config, client_name):
    settings = load_settings()
    # Set today's current datetime
    today = datetime.now()
    date_time = today.strftime("%Y%m%d")
    # Check if client accounting data already calculated today
    if path.exists(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json"):
        print("Accounting already calculated for " + client_name + " today: " + str(date_time))
        return True
    else:
        print("Calculating accounting data for " + client_name)
        # Check cluster health - Expect Yellow to continue
        if es.check_cluster_health_status(client_config, settings['accounting']['health_check_level']):
            elastic_connection = es.build_es_connection(client_config)
            # Grab the client specific allocation policy (tiering policy)
            index_allocation_policies = get_allocation_policy(client_config)

            # Next, get information on all current indices in client cluster
            indices = es.es_get_indices(client_config)
            print("Client " + client_name + " has " + str(len(indices)) + ' indices')

            accounting_records = []
            special_index_size = 0
            # Loop through each index
            for index in indices:
                if not es.check_special_index(index['index']):
                    # Grab the current index's allocation policy based on index name
                    policy = es.check_index_allocation_policy(index['index'], index_allocation_policies)
                    # Lookup the policy's # of days setting
                    policy_days = index_allocation_policies[policy]

                    # Get current datetime
                    current_date = datetime.now()
                    # Get index datetime
                    index_date = datetime.strptime(index['creation.date.string'], '%Y-%m-%dT%H:%M:%S.%fZ')
                    # Figure out how many days since current_date vs. index_date
                    days_ago = (current_date - index_date).days
                    
                    # Build client specific daily accounting records
                    # Convert index size from bytes to gigabytes
                    index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8)
                    # Calculate indices daily cost
                    # If index is older than policy_days, set disk type to sata
                    # and make sure index is set to proper allocation attribute
                    if days_ago >= policy_days:
                        cost = round(float(index_size_in_gb) * settings['accounting']['sata_cost'], 8)
                        disk_type = 'sata'
                    else:
                        cost = round(float(index_size_in_gb) * settings['accounting']['ssd_cost'], 8)
                        disk_type = 'ssd'
                    index_group = es.get_index_group(index['index'])
                    accounting_record = {
                        'name': index['index'],
                        'client': client_name,
                        'size': float(index_size_in_gb),
                        'logs': int(index['docsCount']),
                        'disk': disk_type,
                        'cost': float(cost),
                        'index_creation_date': index['creation.date.string'],
                        '@timestamp': str(current_date.isoformat()),
                        'index_group': index_group,
                        'allocation_policy': str(policy),
                        'current_policy_days': int(policy_days)
                    }
                    accounting_records.append(accounting_record)
                else:
                    index_size_in_gb = round(float(index['storeSize']) / 1024 / 1024 / 1024, 8)
                    special_index_size += index_size_in_gb
            # Check TOML for device tracking settings, if exists, calculate
            if 'device_tracking_inclusion' in settings['accounting']:
                device_by_ip = []
                device_by_computer_name = []
                device_by_user = []
                total_devices = 0
                for inclusion in settings['accounting']['device_tracking_inclusion']:
                    index = settings['accounting']['device_tracking_inclusion'][inclusion]['index']
                    tracking_field = settings['accounting']['device_tracking_inclusion'][inclusion]['tracking_field']
                    search = settings['accounting']['device_tracking_inclusion'][inclusion]['search']
                    count_as = settings['accounting']['device_tracking_inclusion'][inclusion]['count_as']
                    
                    response = es.aggregate_search(elastic_connection, index, search, 'value_count', tracking_field, sort='@timestamp', limit_to_fields=[tracking_field])
                    if count_as == "computer":
                        device_by_computer_name += response
                    if count_as == "ip":
                        device_by_ip += response
                    if count_as == "user":
                        device_by_user += response
            if 'device_tracking_exclusion' in settings['accounting']:
                for exclusion in settings['accounting']['device_tracking_exclusion']:
                    index = settings['accounting']['device_tracking_exclusion'][exclusion]['index']
                    field_to_exclude_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_exclude_against']
                    field_to_match_against = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against']
                    field_to_match_against_count_as_type = settings['accounting']['device_tracking_exclusion'][exclusion]['field_to_match_against_count_as_type']
                    search = settings['accounting']['device_tracking_exclusion'][exclusion]['search']
                    count_as = settings['accounting']['device_tracking_exclusion'][exclusion]['count_as']
                    response = es.multiple_aggregate_search(elastic_connection, index, search, 'value_count', field_to_match_against, field_to_exclude_against, sort='@timestamp', limit_to_fields=[field_to_exclude_against,field_to_match_against])

                    if field_to_match_against_count_as_type == "computer":
                        # Look for computers in device_by_computer_name, if found
                        # remove response value from field_to_exclude_against
                        for computer in response.keys():
                            if computer in device_by_computer_name:
                                print(f"Removing {computer} from {field_to_exclude_against}")
                                exclusion = response[computer]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                    if field_to_match_against_count_as_type == "ip":
                        # Look for ips in device_by_ip, if found
                        # remove response value from field_to_exclude_against
                        for ip in response.keys():
                            print(ip)
                            if ip in device_by_computer_name:
                                print(f"Removing {ip} from {field_to_exclude_against}")
                                exclusion = response[ip]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                    if field_to_match_against_count_as_type == "user":
                        # Look for users in device_by_user, if found
                        # remove response value from field_to_exclude_against
                        for user in response.keys():
                            if user in device_by_computer_name:
                                print(f"Removing {user} from {field_to_exclude_against}")
                                exclusion = response[user]
                                if field_to_exclude_against == "ip":
                                    device_by_ip.pop(exclusion)
                                if field_to_exclude_against == "computer":
                                    device_by_computer_name.pop(exclusion)
                                if field_to_exclude_against == "user":
                                    device_by_user.pop(exclusion)
                device_by_user_count = len(set(device_by_user))
                device_by_computer_name_count = len(set(device_by_computer_name))
                device_by_ip_count = len(set(device_by_ip))
                total_devices = device_by_user_count + device_by_computer_name_count + device_by_ip_count
                accounting_record = {
                        'client': client_name,
                        'device_count': int(total_devices),
                        '@timestamp': str(current_date.isoformat()),
                    }
                if os.path.isdir(settings['accounting']['output_folder']):
                    with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-device-" + date_time + ".json", 'a') as f:
                        json_content = json.dumps(accounting_record)
                        f.write(json_content)
                        f.write('\n')
                else:
                    print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
            # Appends newest record date into accounting_record
            #for accounting_record in accounting_records:
                #accounting_record['newest_document_date'] = str(es.get_newest_document_date_in_index(client_config, index['index'], elastic_connection).isoformat())
            if not settings['settings']['debug'] and len(accounting_records) != 0:
                for accounting_record in accounting_records:
                    # Create a backup copy of each accounting record
                    if os.path.isdir(settings['accounting']['output_folder']):
                        with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json", 'a') as f:
                            json_content = json.dumps(accounting_record)
                            f.write(json_content)
                            f.write('\n')
                    else:
                        print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
            else:
                print("Debug enabled or no data to save. Not creating accounting file")

            elastic_connection.close()

            cluster_stats = es.get_cluster_stats(client_config)
            # Convert cluster size from bytes to gigabytes
            cluster_size = round(float(cluster_stats['indices']['store']['size_in_bytes']) / 1024 / 1024 / 1024, 8)
            print("Total cluster size is: " + str(cluster_size) + " GB")
            if 'device_tracking_inclusion' in settings['accounting']:
                print(f"Total device tracking is {total_devices}")

            if cluster_size > 1:
                if os.path.isdir(settings['accounting']['output_folder']) and len(accounting_records) != 0 and not settings['settings']['debug']:
                    with open(settings['accounting']['output_folder'] + '/' + client_name + "_accounting-" + date_time + ".json") as f:
                        accounting_file = f.readlines()
                    total_accounting_size = 0
                    for record in accounting_file:
                        json_object = json.loads(record)
                        total_accounting_size += float(json_object['size'])
                    total_accounting_size = round(total_accounting_size, 8)
                    print("Total accounting record size is: " + str(total_accounting_size) + " GB")

                    special_index_size = round(special_index_size, 2)
                    print("Total special index size is : " + str(special_index_size) + " GB")

                    total_accounting_index_size = special_index_size + total_accounting_size
                    print("Accounting and special index size equals : " + str(total_accounting_index_size) + " GB")

                    difference_size = cluster_size - total_accounting_index_size
                    print("Difference is " + str(difference_size) + " GB")
                    if difference_size >= 20:
                        message = "Accounting verification is off by more than 20.0 GB. Please find out why. This test is performed by comparing the current cluster size against the records in the accounting JSON output files.\n\nTotal cluster size is : " + str(cluster_size) + " GB\n\nTotal accounting record size is: " + str(total_accounting_size) + " GB\n\nTotal special index size is : " + str(special_index_size) + " GB\n\nAccounting and special index size equals : " + str(total_accounting_index_size) + " GB\n\nDifference is " + str(difference_size) + " GB\n\nThe size difference can be due to the script taking longer to run and the index sizes growing during the accounting calculation. However, if the difference is significant, some other problem likely occurred."
                        send_notification(client_config, "accounting verification", "Failed", message, jira=settings['accounting']['ms-teams'], teams=settings['accounting']['jira'])
                else:
                    if os.path.isdir(settings['accounting']['output_folder']):
                        print(f"{settings['accounting']['output_folder']} does not exist. Unable to write accounting records to disk")
                    if len(accounting_records) != 0:
                        print("No accounting records to write to disk. Empty cluster")
                

                if len(accounting_records) != 0 and not settings['settings']['debug'] and settings['accounting']['output_to_es']:
                    print("Sending accounting records to ES")
                    elasticsearch_connection = es.build_es_connection(client_config)
                    results = es.get_list_by_chunk_size(accounting_records, 100)
                    for result in results:
                        es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100)
                    elasticsearch_connection.close()
                    clients = load_configs()
                    if client_name != settings['accounting']['send_copy_to_client_name'] and settings['accounting']['send_copy_to_client_name'] != '':
                        elasticsearch_connection = es.build_es_connection(clients[settings['accounting']['send_copy_to_client_name']])
                        results = es.get_list_by_chunk_size(accounting_records, 100)
                        for result in results:
                            es.bulk_insert_data_to_es(elasticsearch_connection, result, "accounting", bulk_size=100)
                        elasticsearch_connection.close()
                    return True
                else:
                    if not settings['settings']['debug']:
                        print("No index data found for accounting")
                        return True
                    else:
                        return True
            else:
                return True
        else:
            settings = load_settings()
            print("Accounting operation failed for " + client_name + ". Cluster health does not meet level:  " + settings['accounting']['health_check_level'])
            return False
Exemplo n.º 10
0
def apply_retention_policies(manual_client=""):
    """Apply retention policies

    Args:
        manual_client (str, optional): Name of client. Defaults to "".
    """
    settings = load_settings()
    retry_count = 60
    sleep_time = 60
    success = 0
    if settings['retention']['enabled']:
        # Load all client configurations from /opt/maintenance/*.json
        clients = load_configs()
        # Loop through each client to perform accounting per client\
        for key, client_config in clients.items():
            # Set nice variable names
            client_name = key
            limit_to_client = settings['settings']['limit_to_client']
            print("Processing retention for " + client_name)
            # If client set at command line only run it otherwise
            # execute for all clients
            if limit_to_client == manual_client or limit_to_client == "":
                while retry_count >= 0 and success == 0:
                    # Check cluster health - Expect Yellow to continue
                    if es.check_cluster_health_status(
                            client_config,
                            settings['retention']['health_check_level']):
                        # Grab the client's retention policies
                        index_retention_policies = get_retention_policy(
                            client_config)
                        # Next, get information on all current indices in cluster
                        indices = es.es_get_indices(client_config)
                        # Get the list of indices that are older than the retention policy
                        apply_retention_to_old_indices(
                            indices, index_retention_policies, client_config)
                        success = 1
                    else:
                        if retry_count > 0:
                            print("Retention operation failed for " + client_name + \
                                ". Cluster health does not meet level:  " + \
                                settings['retention']['health_check_level'])
                        else:
                            message = "Retention operation failed.\n\n" + \
                                "It is also possible that connections are " + \
                                "unable to be made to the client/nginx node." + \
                                "Please fix.\n\nRemember that in order for " + \
                                "client's to be properly build you will need " + \
                                "to get their cluster status to **Green** " + \
                                "or **Yellow** and then re-run the following" + \
                                " command:\n\n**python3 " + \
                                "/opt/elastic-ilm/retention.py --client " + \
                                client_name + "**"
                            send_notification(
                                client_config,
                                "retention",
                                "Failed",
                                message,
                                teams=settings['retention']['ms-teams'],
                                jira=settings['retention']['jira'])
                    if success == 0:
                        # Decrese retry count by one before trying while statement again
                        retry_count = retry_count - 1
                        print("Retry attempts left for retention " + \
                            "operation set to " + str(retry_count) + \
                            " sleeping for " + str(sleep_time) + " seconds")
                        time.sleep(sleep_time)