Пример #1
0
def pinghost(host):
    try:
        output = str(
            subprocess.Popen(["ping.exe", host],
                             stdout=subprocess.PIPE).communicate()[0])
        if 'unreachable' in output:
            log_error('!!!!!' + host + ' is unreachable!!!!!')
            return False
        else:
            log_info(host + ' is online')
            return True
    except Exception as error:
        log_warn('Current Machine is not running Windows-based OS: ' +
                 str(error))
        # Ping command count option as function of OS
        param = '-n' if system_name().lower() == 'windows' else '-c'
        # Building the command. Ex: "ping -c 1 phlamtecdb-a"
        command = ['ping', param, '1', host]
        # Pinging
        if system_call(command) == 0:
            log_info(host + ' is online')
            return True
        else:
            log_error('!!!!!' + host + ' is OFFLINE!!!!!')
            return False
Пример #2
0
def healthcheck(host, port, instance_name, user, password):
    try:
        mysql_connection = mysql.connector.connect(host=host,
                                                   port=port,
                                                   user=user,
                                                   password=password,
                                                   connection_timeout=10)
        mysql_connection.is_connected()
        log_info(host + ' ' + instance_name + ' INSTANCE, IS UP')
        return 0
    except Exception as error:
        log_error('!!!!!' + host + ' ' + instance_name +
                  ' INSTANCE, IS DOWN!!!!!:\t' + str(error))

        return -1
Пример #3
0
    def confirm_normality(self):
        if os.path.isfile(self.DANGER_LOCK) or os.path.isfile(
                self.WARNING_LOCK):
            self.messages.append({
                'status':
                'good',
                'short_message':
                'Everything is back to normal',
                'long_message':
                'Nothing to complain about.',
                'time_string':
                datetime.datetime.now().isoformat()
            })

        self.clear_locks()
        log_info('Everything is OK!')
Пример #4
0
def sendnotification(config, subject):
    try:
        distro_list = config['distro_list']

        msg = EmailMessage()
        msg.set_content(subject)
        msg['Subject'] = subject
        msg['From'] = config['smtp_info']['from']
        msg['To'] = ", ".join(distro_list)

        s = smtplib.SMTP(config['smtp_info']['host'], config['smtp_info']['port'])
        s.send_message(msg)
        s.quit()

        log_info('Notification Sent')
    except Exception as error:
        log_error('Error sending notification!: ' + str(error))
    finally:
        return
Пример #5
0
def sendnotification_replication_errors_nfixed(config, body):
    try:
        distro_list = config['distro_list']

        msg = EmailMessage()
        msg.set_content('Error in replication due to statement:\n\n' + body)
        msg['Subject'] = 'Replication Error(s) on host: ' + config['smtp_info']['host'] + ': Unable to Resolve'
        msg['From'] = config['smtp_info']['from']
        msg['To'] = ", ".join(distro_list)

        s = smtplib.SMTP(config['smtp_info']['host'], config['smtp_info']['port'])
        s.send_message(msg)
        s.quit()

        log_info('Notification Sent')
    except Exception as error:
        log_error('Error sending notification!: ' + str(error))
    finally:
        return
Пример #6
0
    def check(self):
        try:
            cnx = mysql.connector.connect(user=self.user,
                                          password=self.password,
                                          host=self.host,
                                          port=self.port)

            cursor = cnx.cursor()
            query = 'SHOW SLAVE STATUS;'

            something = cursor.execute(query)
            replication_status_row = cursor.fetchall()[0]
            last_error_no = replication_status_row[18]
            last_error = replication_status_row[19]
            seconds_behind_master = replication_status_row[32]
            slave_sql_running_state = replication_status_row[44]

            log_info('Last Error No: ' + str(last_error_no))
            log_info('Last Error: ' + str(last_error_no))
            log_info('Seconds behind master: ' + str(seconds_behind_master))
            log_info('slave_sql_running_state: ' +
                     str(slave_sql_running_state))

            if last_error_no != 0:
                self.raise_replication_error(last_error,
                                             slave_sql_running_state)
            elif seconds_behind_master >= self.lag_interval:
                self.track_lag(slave_sql_running_state, seconds_behind_master)
            else:
                self.confirm_normality()

        except Exception as error:
            self.raise_exception(error)

        if self.messages:
            self.trigger_notifications()
Пример #7
0
    def clear_replication_errors(self):
        stop_slave_query = 'STOP SLAVE;'
        skip_counter_query = 'set global sql_slave_skip_counter = 1;)'
        start_slave_query = 'START SLAVE;'
        slave_status_query = 'SHOW SLAVE STATUS;'
        total_errors = 0
        errors = dict()

        more_errors = True

        try:
            # Log in as the replication user
            log_info('Replication: Loggin in as Replication User')
            cnx = mysql.connector.connect(
                user=self.config['replication']['user'],
                password=self.config['replication']['password'],
                host=self.host,
                port=self.port)
            log_info('Replication: Successfully Logged in as Replication User')
        except Exception as error:
            log_error(
                "Replication: Error logging in as Replication User on host: " +
                self.host + ': ' + str(error))
            sendnotification_replication_errors_nfixed(
                self.config, 'Potential Errors present in Replication.'
                ' Unable to resolve issue(s).')
        while more_errors:
            try:
                log_info('Replication: Performing \"Status\" on slave')
                # Get the status of the replication slave
                cursor = cnx.cursor()
                cursor.execute(slave_status_query)
                replication_status_row = cursor.fetchall()[0]
                replication_error_code = str(replication_status_row[18])
                replication_error_str = str(replication_status_row[19])

                # If there are errors, lets try to resolve them
                if int(replication_error_code) != 0:
                    log_error('Replication: Error detected: ' +
                              replication_error_str)
                    errors[replication_error_code] = replication_error_str
                    total_errors = total_errors + 1

                    # lets skip the error and try to resolve the hang up
                    cursor.execute(stop_slave_query)
                    cursor.execute(skip_counter_query)
                    cursor.execute(start_slave_query)

                # Lets check for anymore errors that may be holding us up
                cursor.execute(slave_status_query)
                replication_status_row = cursor.fetchall()[0]
                replication_error_code = str(replication_status_row[18])
                if int(replication_error_code) == 0:
                    more_errors = False
                    log_info("Replication: No more errors detected")
                    sendnotification_replication_errors_fixed(
                        self.config,
                        build_error_report(errors, total_errors, True),
                        replication_error_code)
            except Exception as error:
                self.raise_exception(error)
                log_error(
                    "Replication: Error preventing all replication issues to be resolve. "
                    "Some or none of these issues were resolved:\t" + error)
                sendnotification_replication_errors_nfixed(
                    self.config, 'Potential Errors present in Replication.'
                    ' Unable to resolve issue(s).')
Пример #8
0
import os

from MySQL.health import healthcheck, pinghost
from MySQL.replication import ReplicationChecker
from Notifier.notify import sendnotification
from Utilities.utility import log_info, log_debug

if __name__ == '__main__':
    directory = os.path.realpath(
        os.path.join(os.getcwd(), os.path.dirname(__file__)))
    config = yaml.load((open(os.path.join(directory, 'config.yml'),
                             'r').read()))

    logging.basicConfig(filename=os.path.join(directory, 'replication.log'),
                        level=logging.DEBUG)
    log_info('Monitor Started')

    # Get all the instance names and their port numbers
    log_debug('Gathering instance names from config')
    instances = config['instances']
    ports = dict()
    for instance in instances:
        ports[instance] = config['mysql'][instance]

    # !Check the Instance Status'
    for port in ports:
        log_info('Performing health check on instance: ' + str(port).upper())
        status = healthcheck(config['mysql']['host'], ports.get(port), port,
                             config['mysql']['user'],
                             config['mysql']['password'])
        if status == -1: