示例#1
0
def Run(srvname, *a, **kw):
    args = {}
    if kw.has_key('queue_length_max'):
        args['queue_length_max'] = kw.pop('queue_length_max')
    AppServiceMonitor(**args).monitor(srvname, *a, **kw)
示例#2
0
def Alert(restart_list=['MEM', 'CPU'],
          alert_recipients=[nerds_email],
          alert_sender=nerds_email,
          cpu_limit=99,
          mem_limit=8,
          smtpserver=smtp_server,
          test=False):

    p = re.compile("newreddit(\d+)")
    cache_key = 'already_alerted_'
    for host in AppServiceMonitor(g.monitored_servers):
        for service in host:
            # cpu values
            cpu = [service.cpu(x) for x in (0, 5, 60, 300)]

            output = "\nCPU:   " + ' '.join("%6.2f%%" % x for x in cpu)
            output += "\nMEMORY: %6.2f%%" % service.mem()

            service_name = "%s %s" % (host.host, service.name)

            # is this service pegged?
            mem_pegged = ('MEM' in restart_list and service.mem() > mem_limit)
            need_restart = (('CPU' in restart_list and all(x >= cpu_limit
                                                           for x in cpu))
                            or mem_pegged)

            if (need_restart):
                mesg = ("To: " + ', '.join(alert_recipients) + "\nSubject: " +
                        service_name + " needs attention\n\n" + service_name +
                        (" is out of mem: " if mem_pegged else " is pegged:") +
                        output)
                m = p.match(service.name)
                # If we can restart this process, we do it here
                if m:
                    proc_number = str(m.groups()[0])
                    cmd = "/usr/local/bin/push -h " + \
                        host.host + " -r " + proc_number
                    if test:
                        print(
                            "would have restarted the app with command '%s'" %
                            cmd)
                    else:
                        result = os.popen3(cmd)[2].read()
                        # We override the other message to show we restarted it
                        mesg = ("To: [email protected]\n" + "Subject: " +
                                "Process " + proc_number + " on " + host.host +
                                " was automatically restarted " +
                                "due to the following:\n\n" + output + "\n\n" +
                                "Here was the output:\n" + result)
                    # Uncomment this to disable restart messages
                    #mesg = ""
                last_alerted = g.rendercache.get(cache_key + service_name) or 0
                #last_alerted = 0
                if mesg is not "":
                    if test:
                        print "would have sent email\n '%s'" % mesg
                    elif (time.time() - last_alerted > 300):
                        g.rendercache.set(cache_key + service_name,
                                          time.time())
                        session = smtplib.SMTP(smtpserver)
                        smtpresult = session.sendmail(alert_sender,
                                                      alert_recipients, mesg)
                        session.quit()