def main():
    parser = setupArgs()
    args = parser.parse_args()

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    opsc.waitForCluster(cname=args.clustername,
                        pause=args.pause,
                        trys=args.trys)  # Block until cluster created
    clusters = opsc.session.get(
        "{url}/api/v2/lcm/clusters/".format(url=opsc.url)).json()
    for r in clusters['results']:
        if r['name'] == args.clustername:
            cid = r['id']
    opsc.waitForNodes(numnodes=args.clustersize,
                      pause=args.pause,
                      trys=args.trys)
    if args.dclevel:
        datacenters = opsc.session.get(
            "{url}/api/v2/lcm/datacenters/".format(url=opsc.url)).json()
        for r in datacenters['results']:
            dcid = r['id']
            print "Triggering install for DC, id = {i}".format(i=dcid)
            opsc.triggerInstall(None, dcid)
    else:
        print "Triggering install for cluster, id = {i}".format(i=cid)
        opsc.triggerInstall(cid, None)
示例#2
0
def main():
    parser = setupArgs()
    args = parser.parse_args()

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    # get cluster id, assume 1 cluster
    clusterconf = opsc.session.get(
        "{url}/cluster-configs".format(url=opsc.url)).json()
    cid = clusterconf.keys()[0]
    # get all node configs
    nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url,
                                                       id=cid)).json()
    # loop of configs, counting nodes in each dc
    datacenters = {}
    for n in nodes:
        if n['dc'] in datacenters:
            datacenters[n['dc']] += 1
        else:
            datacenters[n['dc']] = 1
    # reuse dict for post data in REST call
    # min(3,#) handles edge case where # of nodes < 3
    for d in datacenters:
        datacenters[d] = min(3, datacenters[d])
    # keyspaces to alter
    # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin)
    keyspaces = [
        "system_auth", "system_distributed", "system_traces", "dse_analytics",
        "dse_security", "dse_perf", "dse_leases", "cfs_archive",
        "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore"
    ]
    postdata = {
        "strategy_class": "NetworkTopologyStrategy",
        "strategy_options": datacenters,
        "durable_writes": True
    }
    rawjson = json.dumps(postdata)
    # loop over keyspaces
    print "Looping over keyspaces: {k}".format(k=keyspaces)
    print "NOTE: No response indicates success"
    # keep track of non-sucess keyspaces to skip repairing
    skip = []
    for ks in keyspaces:
        print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format(
            url=opsc.url, id=cid, ks=ks, d=rawjson)
        response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format(
            url=opsc.url, id=cid, ks=ks),
                                    data=rawjson).json()
        print "Response: "
        if response != None:
            # add to keyspaces to skip
            skip.append(ks)
            print "Non-success for keyspace: {ks}, excluding later...".format(
                ks=ks)
            lcm.pretty(response)

    print "Calling repair on all keyspaces/nodes:"
    print "Skipping keyspaces: {s}".format(s=skip)
示例#3
0
def main():
    parser = setupArgs()
    args = parser.parse_args()

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)
    # Block until cluster created
    opsc.waitForCluster(args.clustername, args.pause, args.trys)

    clusters = opsc.session.get("{url}/api/v1/lcm/clusters/".format(url=opsc.url)).json()
    for r in clusters['results']:
        if r['name'] == args.clustername:
            cid = r['id']

    # Check if the DC --this-- node should belong to exists, if not add DC
    if opsc.checkForDC(args.dcname):
        print "Datacenter {d} exists".format(d=args.dcname)
    else:
        print "Datacenter {n} doesn't exist, creating...".format(n=args.dcname)
        opsc.addDC(args.dcname, cid)

    # kludge, assuming ony one cluster
    dcid = ""
    datacenters = opsc.session.get("{url}/api/v1/lcm/datacenters/".format(url=opsc.url)).json()
    for d in datacenters['results']:
        if d['name'] == args.dcname:
            dcid = d['id']

    # always add self to DC
    nodes = opsc.session.get("{url}/api/v1/lcm/datacenters/{dcid}/nodes/".format(url=opsc.url, dcid=dcid)).json()
    nodecount = nodes['count']
    # simple counting for node number hits a race condition... work around
    #nodename = 'node'+str(nodecount)
    # aws metadata service instance-id
    #inst = requests.get("http://169.254.169.254/latest/meta-data/instance-id").content
    nodename = 'node-'+args.nodeid
    nodeconf = json.dumps({
        'name': nodename,
        "datacenter-id": dcid,
        "rack": args.rack,
        "ssh-management-address": args.pubip,
        "listen-address": args.privip,
        "rpc-address": "0.0.0.0",
        "broadcast-address": args.pubip,
        "broadcast-rpc-address": args.pubip})
    node = opsc.session.post("{url}/api/v1/lcm/nodes/".format(url=opsc.url), data=nodeconf).json()
    print "Added node '{n}', json:".format(n=nodename)
    lcm.pretty(node)

    nodes = opsc.session.get("{url}/api/v1/lcm/datacenters/{dcid}/nodes/".format(url=opsc.url, dcid=dcid)).json()
    nodecount = nodes['count']
    print "{n} nodes in datacenter {d}".format(n=nodecount, d=dcid)
    print "Exiting addNode..."
示例#4
0
def main():
    parser = setupArgs()
    args = parser.parse_args()

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    count = 0
    while True:
        count += 1
        if count > args.trys:
            print "Maximum attempts, exiting"
            exit()
        try:
            jobs = opsc.session.get(
                "{url}/api/v2/lcm/jobs/".format(url=opsc.url)).json()
        except requests.exceptions.Timeout as e:
            print "Request {c} to OpsC timeout after initial connection, exiting.".format(
                c=count)
            exit()
        except requests.exceptions.ConnectionError as e:
            print "Request {c} to OpsC refused after initial connection, exiting.".format(
                c=count)
            exit()
        lcm.pretty(jobs)
        if jobs['count'] == 0:
            print "No jobs found on try {c}, sleeping {p} sec...".format(
                c=count, p=args.pause)
            time.sleep(args.pause)
            continue
        if runningJob(jobs):
            print "Jobs running/pending on try {c}, sleeping {p} sec...".format(
                c=count, p=args.pause)
            time.sleep(args.pause)
            continue
        if (not runningJob(jobs)) and (jobs['count'] < args.num):
            print "Jobs found on try {c} but num {j} < {n}, sleeping {p} sec...".format(
                c=count, j=jobs['count'], n=args.num, p=args.pause)
            time.sleep(args.pause)
            continue
        if (not runningJob(jobs)) and (jobs['count'] >= args.num):
            print "No jobs running/pending and num >= {n} on try {c}, exiting".format(
                n=args.num, c=count)
            break
def main():
    parser = setupArgs()
    args = parser.parse_args()

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    # get cluster id, assume 1 cluster
    clusterconf = opsc.session.get(
        "{url}/cluster-configs".format(url=opsc.url)).json()
    cid = clusterconf.keys()[0]
    # get all node configs
    nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url,
                                                       id=cid)).json()
    # loop of configs, counting nodes in each dc
    datacenters = {}
    for n in nodes:
        if n['dc'] in datacenters:
            datacenters[n['dc']] += 1
        else:
            datacenters[n['dc']] = 1
    # reuse dict for post data in REST call
    # min(3,#) handles edge case where # of nodes < 3
    for d in datacenters:
        datacenters[d] = min(3, datacenters[d])
    # keyspaces to alter
    # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin)
    keyspaces = [
        "system_auth", "system_distributed", "system_traces", "dse_analytics",
        "dse_security", "dse_perf", "dse_leases", "cfs_archive",
        "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore"
    ]
    postdata = {
        "strategy_class": "NetworkTopologyStrategy",
        "strategy_options": datacenters,
        "durable_writes": True
    }
    rawjson = json.dumps(postdata)
    # loop over keyspaces
    print "Looping over keyspaces: {k}".format(k=keyspaces)
    print "NOTE: No response indicates success"
    # keep track of non-sucess keyspaces to skip repairing
    skip = []
    for ks in keyspaces:
        print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format(
            url=opsc.url, id=cid, ks=ks, d=rawjson)
        response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format(
            url=opsc.url, id=cid, ks=ks),
                                    data=rawjson).json()
        print "Response: "
        if response != None:
            # add to keyspaces to skip
            skip.append(ks)
            print "Non-success for keyspace: {ks}, excluding later...".format(
                ks=ks)
            lcm.pretty(response)

    print "Calling repair on all keyspaces/nodes:"
    print "Skipping keyspaces: {s}".format(s=skip)

    for ks in keyspaces:
        if ks in skip:
            print "Skipping keyspace {ks}".format(ks=ks)
            continue
        print "Repairing {ks}...".format(ks=ks)
        for node in nodes:
            nodeip = str(node['node_ip'])
            print "    ...on node {n}".format(n=nodeip)
            response = opsc.session.post(
                "{url}/{id}/ops/repair/{node}/{ks}".format(url=opsc.url,
                                                           id=cid,
                                                           node=nodeip,
                                                           ks=ks),
                data='{"is_sequential": false}').json()
            print "   ", response
            running = True
            count = 0
            while (running):
                print "    Sleeping 2s after check {c}...".format(c=count)
                time.sleep(2)
                status = opsc.session.get("{url}/request/{r}/status".format(
                    url=opsc.url, r=response)).json()
                count += 1
                if (status['state'] != u'running'):
                    print "    Status of request {r} is: {s}".format(
                        r=response, s=status['state'])
                    running = False
                if (count >= 15):
                    print "    Status 'running' after {c} checks, continuing".format(
                        c=count)
                    running = False
示例#6
0
def main():
    parser = setupArgs()
    args = parser.parse_args()
    checkArgs(args)

    # Basic repo config
    dserepo = {
        "name": "DSE repo",
        "username": args.repouser,
        "password": args.repopw
    }
    if args.verbose:
        print "Default repo config:"
        tmp = dserepo.copy()
        tmp['password'] = "******"
        lcm.pretty(tmp)

    # If privkey passed read key content...
    if args.privkey != None:
        keypath = os.path.abspath(args.privkey)
        with open(keypath, 'r') as keyfile:
            privkey = keyfile.read()
        print "Will create cluster {c} on {u} with keypath {k}".format(
            c=args.clustername, u=args.opsc_ip, k=keypath)
        dsecred = {
            "become-mode": "sudo",
            "use-ssh-keys": True,
            "name": "DSE creds",
            "login-user": args.username,
            "ssh-private-key": privkey,
            "become-user": None
        }
    # ...otherwise use a pw
    else:
        print "Will create cluster {c} on {u} with password".format(
            c=args.clustername, u=args.opsc_ip)
        dsecred = {
            "become-mode": "sudo",
            "use-ssh-keys": False,
            "name": "DSE creds",
            "login-user": args.username,
            "login-password": args.password,
            "become-user": None
        }
        if args.becomepw:
            dsecred['become-password'] = args.password
    if args.verbose:
        print "Default creds:"
        tmp = dsecred.copy()
        if 'login-password' in tmp: tmp['login-password'] = "******"
        if 'become-password' in tmp: tmp['become-password'] = "******"
        if 'ssh-private-key' in tmp: tmp['ssh-private-key'] = "ZZZZZ"
        lcm.pretty(tmp)
    # Minimal config profile
    defaultconfig = {
        "name": "Default config",
        "datastax-version": args.dsever,
        "json": {
            'cassandra-yaml': {
                "authenticator":
                "com.datastax.bdp.cassandra.auth.DseAuthenticator",
                "num_tokens": 8,
                "allocate_tokens_for_local_replication_factor": 3,
                "endpoint_snitch":
                "org.apache.cassandra.locator.GossipingPropertyFileSnitch",
                "compaction_throughput_mb_per_sec": 64
            },
            "dse-yaml": {
                "authorization_options": {
                    "enabled": True
                },
                "authentication_options": {
                    "enabled": True
                },
                "dsefs_options": {
                    "enabled": True
                }
            }
        }
    }
    # Since this isn't necessarily being called on the nodes where 'datapath'
    # exists checking is pointless
    if args.datapath != None:
        print "--datapath {p} passed, setting root datapath in default config".format(
            p=args.datapath)
        defaultconfig["json"]["cassandra-yaml"]["data_file_directories"] = [
            os.path.join(args.datapath, "data")
        ]
        defaultconfig["json"]["cassandra-yaml"][
            "saved_caches_directory"] = os.path.join(args.datapath,
                                                     "saved_caches")
        defaultconfig["json"]["cassandra-yaml"][
            "commitlog_directory"] = os.path.join(args.datapath, "commitlog")
        defaultconfig["json"]["dse-yaml"]["dsefs_options"][
            "work_dir"] = os.path.join(args.datapath, "dsefs")
        defaultconfig["json"]["dse-yaml"]["dsefs_options"][
            "data_directories"] = [{
                "dir":
                os.path.join(args.datapath, "dsefs/data")
            }]
    # if --aoss option passed, enable AOSS
    if args.aoss and args.dsever.startswith('6'):
        print "--aoss passed, adding enable AOSS to default config"
        defaultconfig["json"]["dse-yaml"]["alwayson_sql_options"] = {
            "enabled": True
        }
        defaultconfig["json"]["dse-yaml"]["resource_manager_options"] = {
            "worker_options": {
                "workpools": [{
                    "memory": "0.4",
                    "cores": "0.4",
                    "name": "alwayson_sql"
                }]
            }
        }
    elif args.aoss and args.dsever.startswith('5'):
        print "WARNING: --aoss passed and DSE version <6, ignoring --aoss"
    # if nojava option passed, disable java/jce
    if args.nojava:
        print "--nojava passed, adding disable java to default config"
        defaultconfig["json"]["java-setup"] = {}
        defaultconfig["json"]["java-setup"]["manage-java"] = False

    # Overriding all config profile logic above
    # Todo, read config json from a file or http endpoint
    if args.config != None:
        print "WARNING: --config passed, OVERRIDING ALL OTHER config arguments"
        print "WARNING: Failed install job possible, e.g. if config json data"
        print "WARNING: paths don't match existing disks/paths"
        defaultconfig = json.loads(args.config)

    if args.verbose:
        print "Default config profile:"
        lcm.pretty(defaultconfig)

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    # Return config instead of bool?
    # This check is here to allow calling script from node instances if desired.
    # Ie script may be called multiple times.
    # Cluster doesn't esist -> must be 1st node -> do setup
    c = opsc.checkForCluster(args.clustername)
    if not c:
        print "Cluster {n} doesn't exist, creating...".format(
            n=args.clustername)
        cred = opsc.addCred(json.dumps(dsecred))
        repo = opsc.addRepo(json.dumps(dserepo))
        conf = opsc.addConfig(json.dumps(defaultconfig))
        cid = opsc.addCluster(args.clustername, cred['id'], repo['id'],
                              conf['id'], args.dbpasswd)
    else:
        print "Cluster {n} exists, exiting...".format(n=args.clustername)
示例#7
0
def main():
    parser = setupArgs()
    args = parser.parse_args()

    print "Starting alterKeyspaces: {t}".format(t=time.ctime())
    print "Sleeping {s} sec before start...".format(s=args.delay)
    time.sleep(args.delay)
    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    # get cluster id, assume 1 cluster
    clusterconf = opsc.session.get(
        "{url}/cluster-configs".format(url=opsc.url)).json()
    if len(clusterconf.keys()) == 0:
        print "Error: no clusters, exiting."
        # exiting with 0 as to not propigate error up to deploy
        exit()
    if args.verbose:
        lcm.pretty(clusterconf)
    cid = clusterconf.keys()[0]
    # get all node configs
    nodes = opsc.session.get("{url}/{id}/nodes".format(url=opsc.url,
                                                       id=cid)).json()
    if len(nodes) == 0:
        print "Error: no nodes, exiting."
        # exiting with 0 as to not propigate error up to deploy
        exit()
    if args.verbose:
        lcm.pretty(nodes)
    # loop of configs, counting nodes in each dc
    datacenters = {}
    for n in nodes:
        if n['dc'] in datacenters:
            datacenters[n['dc']] += 1
        else:
            datacenters[n['dc']] = 1
    # reuse dict for post data in REST call
    # min(3,#) handles edge case where # of nodes < 3
    for d in datacenters:
        datacenters[d] = min(3, datacenters[d])
    # keyspaces to alter
    # leaving out LocalStrategy (system & system_schema) and EverywhereStrategy (dse_system & solr_admin)
    keyspaces = {
        "system_auth", "system_distributed", "system_traces", "dse_analytics",
        "dse_security", "dse_perf", "dse_leases", "cfs_archive",
        "spark_system", "cfs", "dsefs", "OpsCenter", "HiveMetaStore"
    }
    postdata = {
        "strategy_class": "NetworkTopologyStrategy",
        "strategy_options": datacenters,
        "durable_writes": True
    }
    rawjson = json.dumps(postdata)
    # loop over keyspaces
    print "Looping over keyspaces: {k}".format(k=', '.join(keyspaces))
    print "NOTE: No response indicates success"
    # keep track of non-sucess keyspaces to skip repairing
    skip = set()
    for ks in keyspaces:
        print "Calling: PUT {url}/{id}/keyspaces/{ks} with {d}".format(
            url=opsc.url, id=cid, ks=ks, d=rawjson)
        response = opsc.session.put("{url}/{id}/keyspaces/{ks}".format(
            url=opsc.url, id=cid, ks=ks),
                                    data=rawjson).json()
        print "Response: {r}".format(r=response)
        if response != None:
            # add to keyspaces to skip
            skip.add(ks)
            print "Non-success for keyspace: {ks}, excluding later...".format(
                ks=ks)
            lcm.pretty(response)

    print "Skipping keyspaces: {s}".format(s=', '.join(skip))
    for ks in skip:
        keyspaces.discard(ks)
    # look for version on all nodes, in case agent is down on some
    # dummy version for edge case where all agents aren't reporting, then bail
    version = '0'
    for n in nodes:
        if 'dse' in n['node_version']:
            version = n['node_version']['dse']
    if version.startswith('0'):
        print "Error: no DSE version found, exiting."
        # exiting with 0 as to not propigate error up to deploy
        exit(0)
    if version.startswith('5'):
        if args.norepair:
            print "--norepair passed, skipping repair and exiting."
            exit(0)
        print "DSE version: {v}, calling repairs".format(v=version)
        print "Running repairs"
        runRepair(opsc, cid, nodes, keyspaces)
    else:
        print "DSE version: {v}".format(v=version)
        if args.nodesync:
            # Explicitly add dse_system/solr_admin which aren't passed in because they're
            # EverywhereStrategy and therefore un-altered
            keyspaces.add("dse_system")
            keyspaces.add("solr_admin")
            # Explicitly skip system_auth and opsc KS's
            keyspaces.discard("OpsCenter")
            keyspaces.discard("system_auth")
            enableNodesync(opsc, cid, keyspaces)
            # Explicitly repair keyspaces system_auth and OpsCenter
            if args.norepair:
                print "--norepair passed, skipping repair and exiting."
                exit(0)
            runRepair(opsc, cid, nodes, {"system_auth", "OpsCenter"})
        else:
            if args.norepair:
                print "--norepair passed, skipping repair and exiting."
                exit(0)
            runRepair(opsc, cid, nodes, keyspaces)
示例#8
0
def main():
    parser = setupArgs()
    args = parser.parse_args()
    checkArgs(args)

    # Basic repo config
    dserepo = json.dumps({
        "name":"DSE repo",
        "username":args.repouser,
        "password":args.repopw})

    # If privkey passed read key content...
    if args.privkey != None:
        keypath = os.path.abspath(args.privkey)
        with open(keypath, 'r') as keyfile:
            privkey = keyfile.read()
        print "Will create cluster {c} on {u} with keypath {k}".format(c=args.clustername, u=args.opsc_ip, k=keypath)
        dsecred = json.dumps({
            "become-mode":"sudo",
            "use-ssh-keys":True,
            "name":"DSE creds",
            "login-user":args.username,
            "ssh-private-key":privkey,
            "become-user":None})
    # ...otherwise use a pw
    else:
        print "Will create cluster {c} on {u} with password".format(c=args.clustername, u=args.opsc_ip)
        dsecred = json.dumps({
            "become-mode":"sudo",
            "use-ssh-keys":False,
            "name":"DSE creds",
            "login-user":args.username,
            "login-password":args.password,
            "become-user":None})

    # Minimal config profile
    # Todo, read config json from a file
    defaultconfig = {
        "name":"Default config",
        "datastax-version": args.dsever,
        "json": {
            'cassandra-yaml': {
                "authenticator":"com.datastax.bdp.cassandra.auth.DseAuthenticator",
                "num_tokens":32,
                "endpoint_snitch":"GossipingPropertyFileSnitch"
            },
            "dse-yaml": {
                "authorization_options": {"enabled": True},
                "authentication_options": {"enabled": True}
            }
        }}
    # Since this isn't necessarily being called on the nodes where 'datapath'
    # exists checking is pointless
    if args.datapath != None:
        defaultconfig["json"]["cassandra-yaml"]["data_file_directories"] = [os.path.join(args.datapath, "data")]
        defaultconfig["json"]["cassandra-yaml"]["saved_caches_directory"] = os.path.join(args.datapath, "saved_caches")
        defaultconfig["json"]["cassandra-yaml"]["commitlog_directory"] = os.path.join(args.datapath, "commitlog")

    defaultconfig = json.dumps(defaultconfig)

    opsc = lcm.OpsCenter(args.opsc_ip, args.opscuser, args.opscpw)
    # Block waiting for OpsC to spin up, create session & login if needed
    opsc.setupSession(pause=args.pause, trys=args.trys)

    # Return config instead of bool?
    # This check is here to allow calling script from node instances if desired.
    # Ie script may be called multiple times.
    # Cluster doesn't esist -> must be 1st node -> do setup
    c = opsc.checkForCluster(args.clustername)
    if not c:
        print "Cluster {n} doesn't exist, creating...".format(n=args.clustername)
        cred = opsc.addCred(dsecred)
        repo = opsc.addRepo(dserepo)
        conf = opsc.addConfig(defaultconfig)
        cid = opsc.addCluster(args.clustername, cred['id'], repo['id'], conf['id'])
    else:
        print "Cluster {n} exists, exiting...".format(n=args.clustername)