Пример #1
0
def create_server(nova_url, image, flavor_id, network_id, session):
    try:
        payload = {
            "server": {"name": SERVER_NAME, "imageRef": image, "flavorRef": flavor_id}
        }
        if network_id:
            payload["server"]["networks"] = [{"uuid": network_id}]
        response = session.post(nova_url + "/servers", data=json.dumps(payload))
        response.raise_for_status()
        server_id = response.json()["server"]["id"]
        helpers.debug("Creating server:%s name:%s" % (server_id, SERVER_NAME))
    except (
        requests.exceptions.ConnectionError,
        requests.exceptions.Timeout,
        requests.exceptions.HTTPError,
        AssertionError,
        IndexError,
        AttributeError,
    ) as e:
        helpers.debug("Error from server while creating server: %s" % response.text)
        helpers.nagios_out(
            "Critical",
            "Could not launch server from image UUID:%s: %s"
            % (image, helpers.errmsg_from_excp(e)),
            2,
        )
    return server_id
 def delete_container(self, container_id):
     url = self.swift_endpoint + '/' + container_id
     try:
         response = self.session.delete(url)
         response.raise_for_status()
     except (requests.exceptions.ConnectionError,
             requests.exceptions.Timeout,
             requests.exceptions.HTTPError) as e:
         helpers.debug("Error while deleting container: %s: %s" %
                       (container_id, helpers.errmsg_from_excp(e)))
         helpers.nagios_out(
             "Critical",
             "Could not delete the OpenStack Swift Container %s: %s" %
             (container_id, helpers.msg_error_args(e)), 2)
    def put_object(self, container_id, object_id, data):
        url = self.swift_endpoint + '/' + container_id + '/' + object_id
        try:
            response = self.session.put(url, data=data)
            response.raise_for_status()

        except (requests.exceptions.ConnectionError,
                requests.exceptions.Timeout,
                requests.exceptions.HTTPError) as e:
            helpers.debug(
                "Error while creating object %s in container %s: %s" %
                (object_id, container_id, helpers.errmsg_from_excp(e)))
            helpers.nagios_out(
                "Critical", "Could not create a new object file: %s: %s" %
                (object_id, helpers.errmsg_from_excp(e)), 2)
Пример #4
0
def get_endpoint_from_appdb(endpoint, appdb_endpoint):
    # Get from AppDB the endpoint
    try:
        helpers.debug("Querying AppDB for endpoint %s" % endpoint)
        url = "/".join([appdb_endpoint, "rest/cloud/computing/endpoints"])
        params = {"filter": "endpointURL::eq:\"%s\"" % endpoint}
        r = requests.get(url,
                         params=params,
                         headers={"accept": "application/json"})
        r.raise_for_status()
        return r.json()["data"][0]["id"]
    except requests.exceptions.RequestException as e:
        msg = "Could not get info from AppDB: %s" % e
        helpers.nagios_out("Unknown", msg, 3)
    except (IndexError, ValueError):
        return None
Пример #5
0
def wait_for_active(nova_url, server_id, vm_timeout, session):
    i, tss = 0, 3
    helpers.debug("Check server status every %ds: " % (STATUS_SLEEP_TIME))
    while i < vm_timeout / STATUS_SLEEP_TIME:
        # server status
        try:
            response = session.get(nova_url + "/servers/%s" % (server_id))
            response.raise_for_status()
            status = response.json()["server"]["status"]
            helpers.debug(status, False)
            if "ACTIVE" in status:
                return True
            if "ERROR" in status:
                helpers.debug(
                    "Error from nova: %s" % response.json()["server"].get("fault", "")
                )
                return False
            time.sleep(STATUS_SLEEP_TIME)
        except (
            requests.exceptions.ConnectionError,
            requests.exceptions.Timeout,
            requests.exceptions.HTTPError,
            AssertionError,
            IndexError,
            AttributeError,
        ) as e:
            if i < tss:
                helpers.debug(
                    "Try to fetch server:%s status one more time. Error was %s\n"
                    % (server_id, helpers.errmsg_from_excp(e))
                )
                helpers.debug("Check server status every %ds: " % (STATUS_SLEEP_TIME))
            else:
                helpers.nagios_out(
                    "Critical",
                    "could not fetch server:%s status: %s"
                    % (server_id, helpers.errmsg_from_excp(e)),
                    2,
                )
        i += 1
    else:
        helpers.nagios_out(
            "Critical",
            "could not create server:%s, timeout:%d exceeded" % (server_id, vm_timeout),
            2,
        )
        return False
    def get_object(self, container_id, object_id):
        url = self.swift_endpoint + '/' + container_id + '/' + object_id
        try:
            response = self.session.get(url)
            response.raise_for_status()
            data = response.content

            assert data

        except (requests.exceptions.ConnectionError,
                requests.exceptions.Timeout, requests.exceptions.HTTPError,
                AssertionError) as e:
            helpers.debug("Error while fetching object %s file: %s" %
                          (object_id, helpers.errmsg_from_excp(e)))
            helpers.nagios_out(
                "Critical", "Could not fetch object: %s: %s" %
                (object_id, helpers.errmsg_from_excp(e)), 2)
Пример #7
0
def delete_server(nova_url, server_id, session):
    try:
        helpers.debug("Trying to delete server=%s" % server_id)
        response = session.delete(nova_url + "/servers/%s" % (server_id))
        response.raise_for_status()
    except (
        requests.exceptions.ConnectionError,
        requests.exceptions.Timeout,
        requests.exceptions.HTTPError,
        AssertionError,
        IndexError,
        AttributeError,
    ) as e:
        helpers.debug("Error from server while deleting server: %s" % response.text)
        helpers.nagios_out(
            "Critical",
            "could not execute DELETE server=%s: %s"
            % (server_id, helpers.errmsg_from_excp(e)),
            2,
        )
Пример #8
0
def clean_up(nova_url, vm_timeout, session):
    try:
        response = session.get(nova_url + "/servers")
        for s in response.json()["servers"]:
            if s["name"] == SERVER_NAME:
                helpers.debug("Found old server %s, waiting for it" % s["id"])
                if not wait_for_delete(nova_url, s["id"], vm_timeout, session):
                    helpers.debug("Old server is still around after timeout, deleting")
                    delete_server(nova_url, s["id"], session)
                    helpers.nagios_out(
                        "Warning",
                        "Previous monitoring instance deleted, probe won't go on!",
                        1,
                    )
    except (
        requests.exceptions.ConnectionError,
        requests.exceptions.Timeout,
        requests.exceptions.HTTPError,
        AssertionError,
        IndexError,
        AttributeError,
    ) as e:
        helpers.debug(
            "Something went wrong while cleaning up, should be still ok: %s"
            % helpers.errmsg_from_excp(e)
        )
Пример #9
0
def wait_for_delete(nova_url, server_id, vm_timeout, session):
    server_deleted = False
    i = 0
    helpers.debug("Check server %s status every %ds:" %
                  (server_id, STATUS_SLEEP_TIME))
    while i < vm_timeout / STATUS_SLEEP_TIME:
        # server status
        try:
            response = session.get(nova_url + "/servers")
            servfound = False
            for s in response.json()["servers"]:
                if server_id == s["id"]:
                    servfound = True
                    response = session.get(nova_url +
                                           "/servers/%s" % server_id)
                    response.raise_for_status()
                    status = response.json()["server"]["status"]
                    helpers.debug(status, False)
                    if status.startswith("DELETED"):
                        server_deleted = True
                        break
            if not servfound:
                server_deleted = True
                helpers.debug("DELETED (Not found)", False)
            if server_deleted:
                break
            time.sleep(STATUS_SLEEP_TIME)
        except (
                requests.exceptions.ConnectionError,
                requests.exceptions.Timeout,
                requests.exceptions.HTTPError,
                AssertionError,
                IndexError,
                AttributeError,
        ) as e:
            server_deleted = True
            helpers.debug(
                "Could not fetch server:%s status: %s - server is DELETED" %
                (server_id, helpers.errmsg_from_excp(e)))
            break
        i += 1
    return server_deleted
Пример #10
0
def main():
    class ArgHolder(object):
        pass

    argholder = ArgHolder()

    argnotspec = []
    parser = argparse.ArgumentParser()
    parser.add_argument("--endpoint", dest="endpoint", nargs="?")
    parser.add_argument("-v", dest="verb", action="store_true")
    parser.add_argument("--flavor", dest="flavor", nargs="?")
    parser.add_argument("--image", dest="image", nargs="?")
    parser.add_argument("--cert", dest="cert", nargs="?")
    parser.add_argument("--access-token", dest="access_token", nargs="?")
    parser.add_argument("-t", dest="timeout", type=int, nargs="?", default=120)
    parser.add_argument(
        "--vm-timeout", dest="vm_timeout", type=int, nargs="?", default=300
    )
    parser.add_argument("--appdb-image", dest="appdb_img", nargs="?")
    parser.add_argument(
        "--identity-provider", dest="identity_provider", default="egi.eu", nargs="?"
    )

    parser.parse_args(namespace=argholder)
    helpers.verbose = argholder.verb

    for arg in ["endpoint", "timeout"]:
        if eval("argholder." + arg) is None:
            argnotspec.append(arg)

    if argholder.cert is None and argholder.access_token is None:
        helpers.nagios_out(
            "Unknown", "cert or access-token command-line arguments not specified", 3
        )

    if argholder.image is None and argholder.appdb_img is None:
        helpers.nagios_out(
            "Unknown", "image or appdb-image command-line arguments not specified", 3
        )

    if len(argnotspec) > 0:
        msg_error_args = ""
        for arg in argnotspec:
            msg_error_args += "%s " % (arg)
        helpers.nagios_out(
            "Unknown", "command-line arguments not specified, " + msg_error_args, 3
        )
    else:
        if not argholder.endpoint.startswith("http"):
            helpers.nagios_out("Unknown", "command-line arguments are not correct", 3)
        if argholder.cert and not os.path.isfile(argholder.cert):
            helpers.nagios_out("Unknown", "cert file does not exist", 3)
        if argholder.access_token and not os.path.isfile(argholder.access_token):
            helpers.nagios_out("Unknown", "access-token file does not exist", 3)

    ks_token = None
    access_token = None
    if argholder.access_token:
        access_file = open(argholder.access_token, "r")
        access_token = access_file.read().rstrip("\n")
        access_file.close()

    for auth_class in [helpers.OIDCAuth, helpers.X509V3Auth, helpers.X509V2Auth]:
        try:
            auth = auth_class(
                argholder.endpoint,
                argholder.timeout,
                access_token=access_token,
                identity_provider=argholder.identity_provider,
                userca=argholder.cert,
            )
            ks_token = auth.authenticate()
            tenant_id, nova_url, glance_url, neutron_url = auth.get_info()
            helpers.debug("Authenticated with %s" % auth_class.name)
            break
        except helpers.AuthenticationException:
            # just go ahead
            helpers.debug("Authentication with %s failed" % auth_class.name)
    else:
        helpers.nagios_out("Critical", "Unable to authenticate against Keystone", 2)

    helpers.debug("Endpoint: %s" % (argholder.endpoint))
    helpers.debug("Auth token (cut to 64 chars): %.64s" % ks_token)
    helpers.debug("Project OPS, ID: %s" % tenant_id)
    helpers.debug("Nova: %s" % nova_url)
    helpers.debug("Glance: %s" % glance_url)
    helpers.debug("Neutron: %s" % neutron_url)

    # get a common session for not repeating the auth header code
    session = requests.Session()
    session.headers.update({"x-auth-token": ks_token})
    session.headers.update(
        {"content-type": "application/json", "accept": "application/json"}
    )
    session.timeout = argholder.timeout
    session.verify = True

    if not argholder.image:
        image = get_image_id(glance_url, argholder.appdb_img, session)
    else:
        image = argholder.image

    helpers.debug("Image: %s" % image)

    if not argholder.flavor:
        flavor_id = get_smaller_flavor_id(nova_url, session)
    else:
        # fetch flavor_id for given flavor (resource)
        try:
            response = session.get(nova_url + "/flavors")
            response.raise_for_status()
            flavors = response.json()["flavors"]
            flavor_id = None
            for f in flavors:
                if f["name"] == argholder.flavor:
                    flavor_id = f["id"]
            assert flavor_id is not None
        except (
            requests.exceptions.ConnectionError,
            requests.exceptions.Timeout,
            requests.exceptions.HTTPError,
        ) as e:
            helpers.nagios_out(
                "Critical",
                "could not fetch flavor ID, endpoint does not correctly exposes "
                "available flavors: %s" % helpers.errmsg_from_excp(e),
                2,
            )
        except (AssertionError, IndexError, AttributeError) as e:
            helpers.nagios_out(
                "Critical",
                "could not fetch flavor ID, endpoint does not correctly exposes "
                "available flavors: %s" % str(e),
                2,
            )

    helpers.debug("Flavor ID: %s" % flavor_id)

    network_id = None
    if neutron_url:
        try:
            response = session.get(neutron_url + "/v2.0/networks")
            response.raise_for_status()
            for network in response.json()["networks"]:
                # assume first available active network owned by the tenant is ok
                if network["status"] == "ACTIVE" and network["tenant_id"] == tenant_id:
                    network_id = network["id"]
                    helpers.debug("Network id: %s" % network_id)
                    break
            else:
                helpers.debug(
                    "No tenant-owned network found, hoping VM creation will "
                    "still work..."
                )
        except (
            requests.exceptions.ConnectionError,
            requests.exceptions.Timeout,
            requests.exceptions.HTTPError,
            AssertionError,
            IndexError,
            AttributeError,
        ) as e:
            helpers.nagios_out(
                "Critical",
                "Could not get network id: %s" % helpers.errmsg_from_excp(e),
                2,
            )

    else:
        helpers.debug("Skipping network discovery as there is no neutron endpoint")

    # remove previous servers if found
    clean_up(nova_url, argholder.vm_timeout, session)

    # create server
    st = time.time()
    server_id = create_server(nova_url, image, flavor_id, network_id, session)
    server_built = wait_for_active(nova_url, server_id, argholder.vm_timeout, session)
    server_createt = round(time.time() - st, 2)

    if server_built:
        helpers.debug("\nServer created in %.2f seconds" % (server_createt))

    # server delete
    st = time.time()
    delete_server(nova_url, server_id, session)
    server_deleted = wait_for_delete(nova_url, server_id, argholder.vm_timeout, session)
    server_deletet = round(time.time() - st, 2)
    helpers.debug("\nServer=%s deleted in %.2f seconds" % (server_id, server_deletet))

    if server_built and server_deleted:
        helpers.nagios_out(
            "OK",
            "Compute instance=%s created(%.2fs) and destroyed(%.2fs)"
            % (server_id, server_createt, server_deletet),
            0,
        )
    elif server_built:
        # Built but not deleted
        helpers.nagios_out(
            "Critical",
            "Compute instance=%s created (%.2fs) but not destroyed(%.2fs)"
            % (server_id, server_createt, server_deletet),
            2,
        )
    else:
        # not built but deleted
        helpers.nagios_out(
            "Critical",
            "Compute instance=%s created with error(%.2fs) and destroyed(%.2fs)"
            % (server_id, server_createt, server_deletet),
            2,
        )
Пример #11
0
def main():
    argnotspec = []
    parser = argparse.ArgumentParser()
    parser.add_argument("--endpoint",
                        dest="endpoint",
                        type=str,
                        help="The Keystone public endpoint")
    parser.add_argument("--cert",
                        dest="cert",
                        type=str,
                        help="The X.509 proxy certificate")
    parser.add_argument("--access-token",
                        dest="access_token",
                        type=str,
                        help="Access token")
    parser.add_argument(
        "-t",
        "--timeout",
        dest="timeout",
        type=int,
        default=120,
        help="The max timeout (in sec) before exiting. Default is '120'.")
    parser.add_argument("--identity-provider",
                        dest="identity_provider",
                        default="egi.eu",
                        help="Identity provider. Default is 'egi.eu'.")
    parser.add_argument("-v",
                        "--verbose",
                        dest="verbose",
                        action="store_true",
                        default=False)

    args = parser.parse_args()
    helpers.verbose = args.verbose

    if args.endpoint is None:
        argnotspec.append("endpoint")

    if args.cert is None and args.access_token is None:
        helpers.nagios_out(
            "Unknown",
            "cert or access-token command-line arguments not specified", 3)

    if len(argnotspec) > 0:
        msg_error_args = ""
        for arg in argnotspec:
            msg_error_args += arg

        helpers.nagios_out(
            "Unknown",
            "command-line arguments not specified: " + msg_error_args, 3)

    else:
        if not args.endpoint.startswith("http"):
            helpers.nagios_out("Unknown",
                               "command-line arguments are not correct", 3)

        if args.cert and not os.path.isfile(args.cert):
            helpers.nagios_out("Unknown", "cert file does not exist", 3)

        if args.access_token and not os.path.isfile(args.access_token):
            helpers.nagios_out("Unknown", "access-token file does not exist",
                               3)

    ks_token = None
    access_token = None
    if args.access_token:
        access_file = open(args.access_token, 'r')
        access_token = access_file.read().rstrip('\n')
        access_file.close()

    for auth_class in [
            helpers.OIDCAuth, helpers.X509V3Auth, helpers.X509V2Auth
    ]:
        try:
            auth = auth_class(args.endpoint,
                              args.timeout,
                              access_token=access_token,
                              identity_provider=args.identity_provider,
                              userca=args.cert)
            ks_token = auth.authenticate()
            tenant_id, swift_endpoint = auth.get_swift_endpoint()
            helpers.debug("Authenticated with %s" % auth_class.name)
            break

        except helpers.AuthenticationException:
            helpers.debug("Authentication with %s failed" % auth_class.name)

    else:
        helpers.nagios_out("Critical",
                           "Unable to authenticate against Keystone", 2)

    helpers.debug("Swift public endpoint: %s" % swift_endpoint)
    helpers.debug("Auth token (cut to 64 chars): %.64s" % ks_token)
    helpers.debug("Project OPS, ID: %s" % tenant_id)

    # Creating a new Container
    container_id = "container-" + str(uuid.uuid4())
    object_id = "file-" + str(uuid.uuid4())
    data = "This is just an ASCII file\n"

    helpers.debug(
        "Establish a connection with the OpenStack Swift Object Storage")
    session = requests.Session()
    session.headers.update({"x-auth-token": ks_token})
    session.headers.update({
        "content-type": "application/json",
        "accept": "application/json"
    })
    session.timeout = args.timeout
    session.verify = True

    _swift = Swift(swift_endpoint=swift_endpoint,
                   token=ks_token,
                   session=session)

    helpers.debug("Create a new OpenStack Swift Container: %s" % container_id)
    _swift.put_container(container_id)

    helpers.debug("Create a new object file: %s" % object_id)
    _swift.put_object(container_id, object_id, data)

    helpers.debug("Fetch the object file")
    _swift.get_object(container_id, object_id)

    helpers.debug("Delete the object file: %s" % object_id)
    _swift.delete_object(container_id, object_id)

    helpers.debug("Delete the OpenStack Swift Container %s" % container_id)
    _swift.delete_container(container_id)

    helpers.debug("Close connection with the OpenStack Swift Object Storage")
    session.close()

    helpers.nagios_out(
        "OK", "OpenStack Swift Container %s created and destroyed, "
        "object %s created and destroyed" % (container_id, object_id), 0)
Пример #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-e", "--endpoint", dest="endpoint", required=True)
    parser.add_argument("-v", dest="verb", action="store_true")
    parser.add_argument("-t", dest="timeout", type=int, default=120)
    parser.add_argument("--appdb-endpoint",
                        default="http://is.marie.hellasgrid.gr")
    parser.add_argument("--warning-treshold", type=int, default=1)
    parser.add_argument("--critical-treshold", type=int, default=5)
    opts = parser.parse_args()

    if opts.verb:
        helpers.verbose = True

    endpoint_id = get_endpoint_from_appdb(opts.endpoint, opts.appdb_endpoint)
    if not endpoint_id:
        # ARGO adds the port even if it's not originally in GOC, so try to
        # find the endpoint without it if it's HTTPS/443
        parsed = urlparse(opts.endpoint)
        if parsed[0] == "https" and parsed[1].endswith(":443"):
            helpers.debug("Retry query with no port in URL")
            new_endpoint = urlunparse((parsed[0], parsed[1][:-4], parsed[2],
                                       parsed[3], parsed[4], parsed[5]))
            endpoint_id = get_endpoint_from_appdb(new_endpoint,
                                                  opts.appdb_endpoint)

    if not endpoint_id:
        msg = "Could not get info from AppDB about endpoint %s" % opts.endpoint
        helpers.nagios_out("Critical", msg, 2)

    try:
        url = "/".join([
            opts.appdb_endpoint,
            "rest/cloud/computing/endpoints/%s" % endpoint_id
        ])
        r = requests.get(url,
                         params={
                             "limit": "0",
                             "skip": "0"
                         },
                         headers={"accept": "application/json"})
        r.raise_for_status()
        vos = r.json()["data"]["shares"]
    except requests.exceptions.RequestException as e:
        msg = "Could not get info from AppDB: %s" % e
        helpers.nagios_out("Unknown", msg, 3)
    except (IndexError, ValueError):
        msg = "Could not get info from AppDB about endpoint %s" % opts.endpoint
        helpers.nagios_out("Critical", msg, 2)

    # Now check how old the information is
    # TODO: check if all the expected VOs are present
    today = datetime.today()
    for vo in vos:
        # entityCreationTime has the date where the info was produced
        # should look like "2020-12-14T10:50:56.773201"
        # will produce a Warning if the info is older than 1 day
        # or critical if older than 5 days
        updated = datetime.strptime(vo["entityCreationTime"][:16],
                                    "%Y-%m-%dT%H:%M")
        helpers.debug("VO %(VO)s updated by %(entityCreationTime)s" % vo)
        diff_days = ((today - updated).total_seconds()) / (60 * 60 * 24.)
        if diff_days > opts.critical_treshold:
            msg = ("VO %s info is older than %s days" %
                   (vo["VO"], opts.critical_treshold))
            helpers.nagios_out("Critical", msg, 2)
        elif diff_days > opts.warning_treshold:
            msg = ("VO %s info is older than %s days" %
                   (vo["VO"], opts.warning_treshold))
            helpers.nagios_out("Warning", msg, 1)

    helpers.nagios_out("OK",
                       "Endpoint publishing up to date information for VOs", 0)