def run_backup_and_restore(backup_plan, restore_plan, plan_parameters): # Write data to Cassandra with a metronome job launch_and_verify_job(WRITE_DATA_JOB) # Verify that the data was written launch_and_verify_job(VERIFY_DATA_JOB) # Run backup plan, uploading snapshots and schema to S3 plan.start_plan(PACKAGE_NAME, backup_plan, parameters=plan_parameters) spin.time_wait_noisy(lambda: (plan.get_plan(PACKAGE_NAME, backup_plan). json()['status'] == 'COMPLETE')) # Delete all keyspaces and tables with a metronome job launch_and_verify_job(DELETE_DATA_JOB) # Verify that the keyspaces and tables were deleted launch_and_verify_job(VERIFY_DELETION_JOB) # Run restore plan, retrieving snapshots and schema from S3 plan.start_plan(PACKAGE_NAME, restore_plan, parameters=plan_parameters) spin.time_wait_noisy(lambda: (plan.get_plan(PACKAGE_NAME, restore_plan). json()['status'] == 'COMPLETE')) # Verify that the data we wrote and then deleted has been restored launch_and_verify_job(VERIFY_DATA_JOB) # Delete data in preparation for any other backup tests launch_and_verify_job(DELETE_DATA_JOB)
def test_backup_and_restore_flow(): backup_parameters = { 'S3_BUCKET_NAME': os.getenv('AWS_BUCKET_NAME', 'infinity-framework-test'), 'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } # Write data to Cassandra with a metronome job launch_and_verify_job(WRITE_DATA_JOB) # Verify that the data was written launch_and_verify_job(VERIFY_DATA_JOB) # Run backup plan, uploading snapshots and schema to S3 plan.start_plan(PACKAGE_NAME, 'backup-s3', parameters=backup_parameters) spin.time_wait_noisy(lambda: (plan.get_plan(PACKAGE_NAME, 'backup-s3'). json()['status'] == 'COMPLETE')) # Delete all keyspaces and tables with a metronome job launch_and_verify_job(DELETE_DATA_JOB) # Verify that the keyspaces and tables were deleted launch_and_verify_job(VERIFY_DELETION_JOB) # Run restore plan, retrieving snapshots and schema from S3 plan.start_plan(PACKAGE_NAME, 'restore-s3', parameters=backup_parameters) spin.time_wait_noisy(lambda: (plan.get_plan(PACKAGE_NAME, 'restore-s3'). json()['status'] == 'COMPLETE')) # Verify that the data we wrote and then deleted has been restored launch_and_verify_job(VERIFY_DATA_JOB, expected_successes=2)
def test_all_tasks_are_launched(): service_options = {"service": {"yaml": "plan"}} sdk_install.install(config.PACKAGE_NAME, foldered_name, 0, additional_options=service_options, wait_for_deployment=False, wait_for_all_conditions=True) # after above method returns, start all plans right away. plans = ["manual-plan-0", "manual-plan-1", "manual-plan-2"] for plan in plans: sdk_plan.start_plan(foldered_name, plan) for plan in plans: sdk_plan.wait_for_completed_plan(foldered_name, plan) pods = ["custom-pod-A-0", "custom-pod-B-0", "custom-pod-C-0"] for pod in pods: # /pod/<pod-id>/info fetches data from SDK's persistence layer pod_hello_0_info = sdk_cmd.service_request( "GET", foldered_name, "/v1/pod/{}/info".format(pod)).json() for taskInfoAndStatus in pod_hello_0_info: info = taskInfoAndStatus["info"] status = taskInfoAndStatus["status"] # While `info` object is always present, `status` may or may not be present based # on whether the task was launched and we received an update from mesos (or not). if status: assert info["taskId"]["value"] == status["taskId"]["value"] assert len(info["taskId"]["value"]) > 0 else: assert len(info["taskId"]["value"]) == 0
def test_repair_cleanup_plans_complete(): parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[ config.get_write_data_job( node_address=config.get_foldered_node_address()), config.get_verify_data_job( node_address=config.get_foldered_node_address()) ], after_jobs=[ config.get_delete_data_job( node_address=config.get_foldered_node_address()), config.get_verify_deletion_job( node_address=config.get_foldered_node_address()) ]): sdk_plan.start_plan(config.get_foldered_service_name(), 'cleanup', parameters=parameters) sdk_plan.wait_for_completed_plan(config.get_foldered_service_name(), 'cleanup') sdk_plan.start_plan(config.get_foldered_service_name(), 'repair', parameters=parameters) sdk_plan.wait_for_completed_plan(config.get_foldered_service_name(), 'repair')
def test_tls_connection(cassandra_service_tls, dcos_ca_bundle): """ Tests writing, reading and deleting data over a secure TLS connection. """ with sdk_jobs.InstallJobContext([ config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle) ]): sdk_jobs.run_job( config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job( config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) key_id = os.getenv('AWS_ACCESS_KEY_ID') if not key_id: assert False, 'AWS credentials are required for this test. ' \ 'Disable test with e.g. TEST_TYPES="sanity and not aws"' plan_parameters = { 'AWS_ACCESS_KEY_ID': key_id, 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'S3_BUCKET_NAME': os.getenv('AWS_BUCKET_NAME', 'infinity-framework-test'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, 'backup-s3', parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'backup-s3') sdk_jobs.run_job( config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)) # Run restore plan, downloading snapshots and schema from the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, 'restore-s3', parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'restore-s3') with sdk_jobs.InstallJobContext([ config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle) ]): sdk_jobs.run_job( config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job( config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle))
def test_sidecar(): sdk_plan.start_plan(config.SERVICE_NAME, "sidecar") started_plan = sdk_plan.get_plan(config.SERVICE_NAME, "sidecar") log.info(sdk_plan.plan_string("sidecar", started_plan)) assert len(started_plan["phases"]) == 1 assert started_plan["phases"][0]["name"] == "sidecar-deploy" assert len(started_plan["phases"][0]["steps"]) == 2 sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "sidecar")
def test_sidecar(): plan.start_plan(PACKAGE_NAME, 'sidecar') started_plan = plan.get_plan(PACKAGE_NAME, 'sidecar') sdk_utils.out("sidecar plan: " + str(started_plan)) assert(len(started_plan['phases']) == 1) assert(started_plan['phases'][0]['name'] == 'sidecar-deploy') assert(len(started_plan['phases'][0]['steps']) == 2) plan.wait_for_completed_plan(PACKAGE_NAME, 'sidecar')
def run_plan(plan_name, params=None): sdk_plan.start_plan(config.SERVICE_NAME, plan_name, params) started_plan = sdk_plan.get_plan(config.SERVICE_NAME, plan_name) log.info(sdk_plan.plan_string(plan_name, started_plan)) assert len(started_plan["phases"]) == 1 assert started_plan["phases"][0]["name"] == plan_name + "-deploy" assert len(started_plan["phases"][0]["steps"]) == 2 sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, plan_name)
def run_plan(plan_name, params=None): sdk_plan.start_plan(config.SERVICE_NAME, plan_name, params) started_plan = sdk_plan.get_plan(config.SERVICE_NAME, plan_name) log.info("sidecar plan: " + str(started_plan)) assert (len(started_plan['phases']) == 1) assert (started_plan['phases'][0]['name'] == plan_name + '-deploy') assert (len(started_plan['phases'][0]['steps']) == 2) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, plan_name)
def test_sidecar(): sdk_plan.start_plan(config.SERVICE_NAME, "sidecar") started_plan = sdk_plan.get_plan(config.SERVICE_NAME, "sidecar") log.info(sdk_plan.plan_string("sidecar", started_plan)) assert len(started_plan["phases"]) == 1 assert started_plan["phases"][0]["name"] == "sidecar-deploy" assert len(started_plan["phases"][0]["steps"]) == 2 sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "sidecar")
def test_repair_plan_completes(): repair_parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} plan.start_plan(PACKAGE_NAME, 'repair', parameters=repair_parameters) spin.time_wait_noisy( lambda: ( plan.get_plan(PACKAGE_NAME, 'repair').json()['status'] == 'COMPLETE' ) )
def test_sidecar(): sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar') started_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'sidecar') log.info("sidecar plan: " + str(started_plan)) assert(len(started_plan['phases']) == 1) assert(started_plan['phases'][0]['name'] == 'sidecar-deploy') assert(len(started_plan['phases'][0]['steps']) == 2) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'sidecar')
def run_plan(plan_name, params=None): sdk_plan.start_plan(config.SERVICE_NAME, plan_name, params) started_plan = sdk_plan.get_plan(config.SERVICE_NAME, plan_name) log.info(sdk_plan.plan_string(plan_name, started_plan)) assert len(started_plan["phases"]) == 1 assert started_plan["phases"][0]["name"] == plan_name + "-deploy" assert len(started_plan["phases"][0]["steps"]) == 2 sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, plan_name)
def test_sidecar(): sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar') started_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'sidecar') log.info("sidecar plan: " + str(started_plan)) assert (len(started_plan['phases']) == 1) assert (started_plan['phases'][0]['name'] == 'sidecar-deploy') assert (len(started_plan['phases'][0]['steps']) == 2) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'sidecar')
def run_plan(plan_name, params=None): sdk_plan.start_plan(config.SERVICE_NAME, plan_name, params) started_plan = sdk_plan.get_plan(config.SERVICE_NAME, plan_name) log.info("sidecar plan: " + str(started_plan)) assert(len(started_plan['phases']) == 1) assert(started_plan['phases'][0]['name'] == plan_name + '-deploy') assert(len(started_plan['phases'][0]['steps']) == 2) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, plan_name)
def run_plan(plan_name, params=None): plan.start_plan(PACKAGE_NAME, plan_name, params) started_plan = plan.get_plan(PACKAGE_NAME, plan_name) sdk_utils.out("sidecar plan: " + str(started_plan)) assert (len(started_plan['phases']) == 1) assert (started_plan['phases'][0]['name'] == plan_name + '-deploy') assert (len(started_plan['phases'][0]['steps']) == 2) plan.wait_for_completed_plan(PACKAGE_NAME, plan_name)
def test_cassandra_migration(): backup_service_name = os.getenv('CASSANDRA_BACKUP_CLUSTER_NAME') restore_service_name = os.getenv('CASSANDRA_RESTORE_CLUSTER_NAME') env = EnvironmentContext( CASSANDRA_NODE_ADDRESS=os.getenv('BACKUP_NODE_ADDRESS', 'node-0.cassandra.mesos'), CASSANDRA_NODE_PORT=os.getenv('BACKUP_NODE_PORT', '9042')) plan_parameters = { 'S3_BUCKET_NAME': os.getenv('AWS_BUCKET_NAME', 'infinity-framework-test'), 'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } data_context = DataContext( init_jobs=[WRITE_DATA_JOB, VERIFY_DATA_JOB], cleanup_jobs=[DELETE_DATA_JOB, VERIFY_DELETION_JOB]) # Install and run the write/delete data jobs against backup cluster, # running dcos-cassandra-service with env, JobContext(TEST_JOBS), data_context: # Back this cluster up to S3 backup_parameters = { 'backup_name': plan_parameters['SNAPSHOT_NAME'], 's3_access_key': plan_parameters['AWS_ACCESS_KEY_ID'], 's3_secret_key': plan_parameters['AWS_SECRET_ACCESS_KEY'], 'external_location': 's3://{}'.format(plan_parameters['S3_BUCKET_NAME']), } dcos.http.put('{}v1/backup/start'.format( shakedown.dcos_service_url(backup_service_name)), json=backup_parameters) spin.time_wait_noisy(lambda: get_dcos_cassandra_plan( backup_service_name).json()['status'] == 'COMPLETE') env = EnvironmentContext( CASSANDRA_NODE_ADDRESS=os.getenv('RESTORE_NODE_ADDRESS', 'node-0-server.sdk-cassandra.mesos'), CASSANDRA_NODE_PORT=os.getenv('RESTORE_NODE_PORT', '9052')) data_context = DataContext( cleanup_jobs=[VERIFY_DATA_JOB, DELETE_DATA_JOB, VERIFY_DELETION_JOB]) with env, JobContext(TEST_JOBS), data_context: plan.start_plan(restore_service_name, 'restore-s3', parameters=plan_parameters) spin.time_wait_noisy( lambda: (plan.get_plan(restore_service_name, 'restore-s3').json()[ 'status'] == 'COMPLETE'))
def test_functionality(): parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} # populate 'testspace1' for test, then delete afterwards: with jobs.RunJobContext(before_jobs=[WRITE_DATA_JOB, VERIFY_DATA_JOB], after_jobs=[DELETE_DATA_JOB, VERIFY_DELETION_JOB]): plan.start_plan(PACKAGE_NAME, 'cleanup', parameters=parameters) plan.wait_for_completed_plan(PACKAGE_NAME, 'cleanup') plan.start_plan(PACKAGE_NAME, 'repair', parameters=parameters) plan.wait_for_completed_plan(PACKAGE_NAME, 'repair')
def test_repair_cleanup_plans_complete(): parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[WRITE_DATA_JOB, VERIFY_DATA_JOB], after_jobs=[DELETE_DATA_JOB, VERIFY_DELETION_JOB]): sdk_plan.start_plan(FOLDERED_SERVICE_NAME, 'cleanup', parameters=parameters) sdk_plan.wait_for_completed_plan(FOLDERED_SERVICE_NAME, 'cleanup') sdk_plan.start_plan(FOLDERED_SERVICE_NAME, 'repair', parameters=parameters) sdk_plan.wait_for_completed_plan(FOLDERED_SERVICE_NAME, 'repair')
def test_tls_connection( cassandra_service: Dict[str, Any], dcos_ca_bundle: str, ) -> None: """ Tests writing, reading and deleting data over a secure TLS connection. """ with sdk_jobs.InstallJobContext( [ config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle), ] ): sdk_jobs.run_job(config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job(config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) key_id = os.getenv("AWS_ACCESS_KEY_ID") if not key_id: assert ( False ), "AWS credentials are required for this test. " 'Disable test with e.g. TEST_TYPES="sanity and not aws"' plan_parameters = { "AWS_ACCESS_KEY_ID": key_id, "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"), "AWS_REGION": os.getenv("AWS_REGION", "us-west-2"), "S3_BUCKET_NAME": os.getenv("AWS_BUCKET_NAME", "infinity-framework-test"), "SNAPSHOT_NAME": str(uuid.uuid1()), "CASSANDRA_KEYSPACES": '"testspace1 testspace2"', } # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, "backup-s3", parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "backup-s3") sdk_jobs.run_job(config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)) # Run restore plan, downloading snapshots and schema from the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, "restore-s3", parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "restore-s3") with sdk_jobs.InstallJobContext( [ config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle), ] ): sdk_jobs.run_job(config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job(config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle))
def test_functionality() -> None: parameters = {"CASSANDRA_KEYSPACE": "testspace1"} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[config.get_write_data_job(), config.get_verify_data_job()], after_jobs=[config.get_delete_data_job(), config.get_verify_deletion_job()], ): sdk_plan.start_plan(config.SERVICE_NAME, "cleanup", parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "cleanup") sdk_plan.start_plan(config.SERVICE_NAME, "repair", parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "repair")
def run_backup_and_restore_with_auth( service_name: str, backup_plan: str, restore_plan: str, plan_parameters: Dict[str, Optional[str]], job_node_address: str = DEFAULT_NODE_ADDRESS, ) -> None: write_data_job = get_write_data_job(node_address=job_node_address, auth=True) verify_data_job = get_verify_data_job(node_address=job_node_address, auth=True) delete_data_job = get_delete_data_job(node_address=job_node_address, auth=True) verify_deletion_job = get_verify_deletion_job( node_address=job_node_address, auth=True) # Ensure the keyspaces we will use aren't present. In practice this should run once and fail # because the data isn't present. When the job is flagged as failed (due to restart=NEVER), # the run_job() call will throw. try: sdk_jobs.run_job(delete_data_job) except Exception: log.info("Error during delete (normal if no stale data)") log.info(traceback.format_exc()) sdk_jobs.run_job(write_data_job) sdk_jobs.run_job(verify_data_job) # Run backup plan, uploading snapshots and schema to the cloud sdk_plan.start_plan(service_name, backup_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, backup_plan) # Delete all keyspaces and tables with a metronome job sdk_jobs.run_job(delete_data_job) # Verify that the keyspaces and tables were deleted sdk_jobs.run_job(verify_deletion_job) # Run restore plan, retrieving snapshots and schema from S3 sdk_plan.start_plan(service_name, restore_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, restore_plan) # Verify that the data we wrote and then deleted has been restored sdk_jobs.run_job(verify_data_job) # Delete data in preparation for any other backup tests sdk_jobs.run_job(delete_data_job) sdk_jobs.run_job(verify_deletion_job)
def run_backup_and_restore( service_name: str, backup_plan: str, restore_plan: str, plan_parameters: Dict[str, Optional[str]], job_node_address: str = DEFAULT_NODE_ADDRESS, ) -> None: write_data_job = get_write_data_job(node_address=job_node_address) verify_data_job = get_verify_data_job(node_address=job_node_address) delete_data_job = get_delete_data_job(node_address=job_node_address) verify_deletion_job = get_verify_deletion_job(node_address=job_node_address) # Ensure the keyspaces we will use aren't present. In practice this should run once and fail # because the data isn't present. When the job is flagged as failed (due to restart=NEVER), # the run_job() call will throw. try: sdk_jobs.run_job(delete_data_job) except Exception: log.info("Error during delete (normal if no stale data)") log.info(traceback.format_exc()) # Write data to Cassandra with a metronome job, then verify it was written # Note: Write job will fail if data already exists sdk_jobs.run_job(write_data_job) sdk_jobs.run_job(verify_data_job) # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(service_name, backup_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, backup_plan) # Delete all keyspaces and tables with a metronome job sdk_jobs.run_job(delete_data_job) # Verify that the keyspaces and tables were deleted sdk_jobs.run_job(verify_deletion_job) # Run restore plan, retrieving snapshots and schema from S3 sdk_plan.start_plan(service_name, restore_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, restore_plan) # Verify that the data we wrote and then deleted has been restored sdk_jobs.run_job(verify_data_job) # Delete data in preparation for any other backup tests sdk_jobs.run_job(delete_data_job) sdk_jobs.run_job(verify_deletion_job)
def run_backup_and_restore( service_name, backup_plan, restore_plan, plan_parameters, job_node_address=DEFAULT_NODE_ADDRESS): write_data_job = get_write_data_job(node_address=job_node_address) verify_data_job = get_verify_data_job(node_address=job_node_address) delete_data_job = get_delete_data_job(node_address=job_node_address) verify_deletion_job = get_verify_deletion_job(node_address=job_node_address) # Ensure the keyspaces we will use aren't present. try: jobs.run_job(delete_data_job) except: log.info("Error during delete (normal if no stale data).") tb = traceback.format_exc() log.info(tb) # Write data to Cassandra with a metronome job, then verify it was written # Note: Write job will fail if data already exists sdk_jobs.run_job(write_data_job) sdk_jobs.run_job(verify_data_job) # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(service_name, backup_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, backup_plan) # Delete all keyspaces and tables with a metronome job sdk_jobs.run_job(delete_data_job) # Verify that the keyspaces and tables were deleted sdk_jobs.run_job(verify_deletion_job) # Run restore plan, retrieving snapshots and schema from S3 sdk_plan.start_plan(service_name, restore_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, restore_plan) # Verify that the data we wrote and then deleted has been restored sdk_jobs.run_job(verify_data_job) # Delete data in preparation for any other backup tests sdk_jobs.run_job(delete_data_job) sdk_jobs.run_job(verify_deletion_job)
def test_toxic_sidecar_doesnt_trigger_recovery(): # 1. Run the toxic sidecar plan that will never succeed. # 2. Restart the scheduler. # 3. Verify that its recovery plan has not changed, as a failed ONCE task should # never trigger recovery initial_recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert(initial_recovery_plan['status'] == "COMPLETE") log.info(initial_recovery_plan) sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar-toxic') wait_for_toxic_sidecar() # Restart the scheduler and wait for it to come up. sdk_marathon.restart_app(config.SERVICE_NAME) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Now, verify that its recovery plan hasn't changed. final_recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert(initial_recovery_plan['status'] == final_recovery_plan['status'])
def test_toxic_sidecar_doesnt_trigger_recovery(): # 1. Run the toxic sidecar plan that will never succeed. # 2. Restart the scheduler. # 3. Verify that its recovery plan has not changed, as a failed ONCE task should # never trigger recovery initial_recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert (initial_recovery_plan['status'] == "COMPLETE") log.info(initial_recovery_plan) sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar-toxic') wait_for_toxic_sidecar() # Restart the scheduler and wait for it to come up. sdk_marathon.restart_app(config.SERVICE_NAME) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Now, verify that its recovery plan hasn't changed. final_recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert (initial_recovery_plan['status'] == final_recovery_plan['status'])
def test_toxic_sidecar_doesnt_trigger_recovery(): # 1. Run the toxic sidecar plan that will never succeed. # 2. Restart the scheduler. # 3. Verify that its recovery plan is empty, as a failed FINISHED task should # never trigger recovery recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert(len(recovery_plan['phases']) == 0) log.info(recovery_plan) sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar-toxic') shakedown.wait_for(ToxicSidecarCheck(), timeout_seconds=10 * 60) # Restart the scheduler and wait for it to come up. sdk_marathon.restart_app(config.SERVICE_NAME) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Now, verify that its recovery plan is empty. sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'recovery') recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert(len(recovery_plan['phases']) == 0)
def test_toxic_sidecar_doesnt_trigger_recovery(): # 1. Run the toxic sidecar plan that will never succeed. # 2. Restart the scheduler. # 3. Verify that its recovery plan is empty, as a failed ONCE task should # never trigger recovery recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert (len(recovery_plan['phases']) == 0) log.info(recovery_plan) sdk_plan.start_plan(config.SERVICE_NAME, 'sidecar-toxic') wait_for_toxic_sidecar() # Restart the scheduler and wait for it to come up. sdk_marathon.restart_app(config.SERVICE_NAME) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Now, verify that its recovery plan is empty. sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'recovery') recovery_plan = sdk_plan.get_plan(config.SERVICE_NAME, 'recovery') assert (len(recovery_plan['phases']) == 0)
def test_functionality(): parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[ config.get_write_data_job(), config.get_verify_data_job() ], after_jobs=[ config.get_delete_data_job(), config.get_verify_deletion_job() ]): sdk_plan.start_plan(config.SERVICE_NAME, 'cleanup', parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'cleanup') sdk_plan.start_plan(config.SERVICE_NAME, 'repair', parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'repair')
def test_functionality(): parameters = {'CASSANDRA_KEYSPACE': 'testspace1'} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[ config.get_write_data_job(), config.get_verify_data_job() ], after_jobs=[ config.get_delete_data_job(), config.get_verify_deletion_job() ]): sdk_plan.start_plan(config.SERVICE_NAME, 'cleanup', parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'cleanup') sdk_plan.start_plan(config.SERVICE_NAME, 'repair', parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'repair')
def test_tls_connection(cassandra_service_tls, dcos_ca_bundle): """ Tests writing, reading and deleting data over a secure TLS connection. """ with sdk_jobs.InstallJobContext([ config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)]): sdk_jobs.run_job(config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job(config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) key_id = os.getenv('AWS_ACCESS_KEY_ID') if not key_id: assert False, 'AWS credentials are required for this test. ' \ 'Disable test with e.g. TEST_TYPES="sanity and not aws"' plan_parameters = { 'AWS_ACCESS_KEY_ID': key_id, 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'S3_BUCKET_NAME': os.getenv('AWS_BUCKET_NAME', 'infinity-framework-test'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, 'backup-s3', parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'backup-s3') sdk_jobs.run_job(config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)) # Run restore plan, downloading snapshots and schema from the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, 'restore-s3', parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, 'restore-s3') with sdk_jobs.InstallJobContext([ config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)]): sdk_jobs.run_job(config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job(config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle))
def run_backup_and_restore(service_name, backup_plan, restore_plan, plan_parameters, job_node_address=DEFAULT_NODE_ADDRESS): write_data_job = get_write_data_job(node_address=job_node_address) verify_data_job = get_verify_data_job(node_address=job_node_address) delete_data_job = get_delete_data_job(node_address=job_node_address) verify_deletion_job = get_verify_deletion_job( node_address=job_node_address) # Write data to Cassandra with a metronome job, then verify it was written # Note: Write job will fail if data already exists jobs.run_job(write_data_job) jobs.run_job(verify_data_job) # Run backup plan, uploading snapshots and schema to the cloudddd plan.start_plan(service_name, backup_plan, parameters=plan_parameters) plan.wait_for_completed_plan(service_name, backup_plan) # Delete all keyspaces and tables with a metronome job jobs.run_job(delete_data_job) # Verify that the keyspaces and tables were deleted jobs.run_job(verify_deletion_job) # Run restore plan, retrieving snapshots and schema from S3 plan.start_plan(service_name, restore_plan, parameters=plan_parameters) plan.wait_for_completed_plan(service_name, restore_plan) # Verify that the data we wrote and then deleted has been restored jobs.run_job(verify_data_job) # Delete data in preparation for any other backup tests jobs.run_job(delete_data_job) jobs.run_job(verify_deletion_job)
def run_backup_and_restore( service_name, backup_plan, restore_plan, plan_parameters, job_node_address=DEFAULT_NODE_ADDRESS): write_data_job = get_write_data_job(node_address=job_node_address) verify_data_job = get_verify_data_job(node_address=job_node_address) delete_data_job = get_delete_data_job(node_address=job_node_address) verify_deletion_job = get_verify_deletion_job(node_address=job_node_address) # Write data to Cassandra with a metronome job, then verify it was written # Note: Write job will fail if data already exists sdk_jobs.run_job(write_data_job) sdk_jobs.run_job(verify_data_job) # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(service_name, backup_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, backup_plan) # Delete all keyspaces and tables with a metronome job sdk_jobs.run_job(delete_data_job) # Verify that the keyspaces and tables were deleted sdk_jobs.run_job(verify_deletion_job) # Run restore plan, retrieving snapshots and schema from S3 sdk_plan.start_plan(service_name, restore_plan, parameters=plan_parameters) sdk_plan.wait_for_completed_plan(service_name, restore_plan) # Verify that the data we wrote and then deleted has been restored sdk_jobs.run_job(verify_data_job) # Delete data in preparation for any other backup tests sdk_jobs.run_job(delete_data_job) sdk_jobs.run_job(verify_deletion_job)
def test_functionality() -> None: parameters = {"CASSANDRA_KEYSPACE": "testspace1"} # populate 'testspace1' for test, then delete afterwards: with sdk_jobs.RunJobContext( before_jobs=[ config.get_write_data_job(), config.get_verify_data_job() ], after_jobs=[ config.get_delete_data_job(), config.get_verify_deletion_job() ], ): sdk_plan.start_plan(config.SERVICE_NAME, "cleanup", parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "cleanup") sdk_plan.start_plan(config.SERVICE_NAME, "repair", parameters=parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "repair")
def test_cassandra_migration(): backup_service_name = os.getenv('CASSANDRA_BACKUP_CLUSTER_NAME') restore_service_name = os.getenv('CASSANDRA_RESTORE_CLUSTER_NAME') backup_node_address = os.getenv('BACKUP_NODE_ADDRESS', config.DEFAULT_NODE_ADDRESS) backup_node_port = os.getenv('BACKUP_NODE_PORT', config.DEFAULT_NODE_PORT) backup_write_data_job = config.get_write_data_job(backup_node_address, backup_node_port) backup_verify_data_job = config.get_verify_data_job(backup_node_address, backup_node_port) backup_delete_data_job = config.get_delete_data_job(backup_node_address, backup_node_port) backup_verify_deletion_job = config.get_verify_deletion_job(backup_node_address, backup_node_port) plan_parameters = { 'S3_BUCKET_NAME': os.getenv( 'AWS_BUCKET_NAME', 'infinity-framework-test' ), 'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } backup_install_job_context = sdk_jobs.InstallJobContext( [backup_write_data_job, backup_verify_data_job, backup_delete_data_job, backup_verify_deletion_job]) backup_run_job_context = sdk_jobs.RunJobContext( before_jobs=[backup_write_data_job, backup_verify_data_job], after_jobs=[backup_delete_data_job, backup_verify_deletion_job]) # Install and run the write/delete data jobs against backup cluster, # running dcos-cassandra-service with backup_install_job_context, backup_run_job_context: # Back this cluster up to S3 backup_parameters = { 'backup_name': plan_parameters['SNAPSHOT_NAME'], 's3_access_key': plan_parameters['AWS_ACCESS_KEY_ID'], 's3_secret_key': plan_parameters['AWS_SECRET_ACCESS_KEY'], 'external_location': 's3://{}'.format(plan_parameters['S3_BUCKET_NAME']), } sdk_cmd.service_request('PUT', backup_service_name, '/v1/backup/start', json=backup_parameters) sdk_plan.wait_for_completed_deployment(backup_service_name) # Restore data to second instance: restore_node_address = os.getenv( 'RESTORE_NODE_ADDRESS', sdk_hosts.autoip_host('sdk-cassandra', 'node-0-server')) restore_node_port = os.getenv('RESTORE_NODE_PORT', '9052') restore_write_data_job = config.get_write_data_job(restore_node_address, restore_node_port) restore_verify_data_job = config.get_verify_data_job(restore_node_address, restore_node_port) restore_delete_data_job = config.get_delete_data_job(restore_node_address, restore_node_port) restore_verify_deletion_job = config.get_verify_deletion_job(restore_node_address, restore_node_port) restore_install_job_context = sdk_jobs.InstallJobContext( [restore_write_data_job, restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job] ) restore_run_job_context = sdk_jobs.RunJobContext( after_jobs=[restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job] ) with restore_install_job_context, restore_run_job_context: sdk_plan.start_plan( restore_service_name, 'restore-s3', parameters=plan_parameters ) sdk_plan.wait_for_completed_plan(restore_service_name, 'restore-s3')
def test_tls_connection( cassandra_service: Dict[str, Any], dcos_ca_bundle: str, ) -> None: """ Tests writing, reading and deleting data over a secure TLS connection. """ with sdk_jobs.InstallJobContext([ config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle), ]): sdk_jobs.run_job( config.get_write_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job( config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) key_id = os.getenv("AWS_ACCESS_KEY_ID") if not key_id: assert ( False ), "AWS credentials are required for this test. " 'Disable test with e.g. TEST_TYPES="sanity and not aws"' plan_parameters = { "AWS_ACCESS_KEY_ID": key_id, "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"), "AWS_REGION": os.getenv("AWS_REGION", "us-west-2"), "S3_BUCKET_NAME": os.getenv("AWS_BUCKET_NAME", "infinity-framework-test"), "SNAPSHOT_NAME": str(uuid.uuid1()), "CASSANDRA_KEYSPACES": '"testspace1 testspace2"', } # Run backup plan, uploading snapshots and schema to the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, "backup-s3", parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "backup-s3") sdk_jobs.run_job( config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle)) # Run restore plan, downloading snapshots and schema from the cloudddd sdk_plan.start_plan(config.SERVICE_NAME, "restore-s3", parameters=plan_parameters) sdk_plan.wait_for_completed_plan(config.SERVICE_NAME, "restore-s3") with sdk_jobs.InstallJobContext([ config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle), config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle), ]): sdk_jobs.run_job( config.get_verify_data_job(dcos_ca_bundle=dcos_ca_bundle)) sdk_jobs.run_job( config.get_delete_data_job(dcos_ca_bundle=dcos_ca_bundle))
def test_cassandra_migration(): backup_service_name = os.getenv('CASSANDRA_BACKUP_CLUSTER_NAME') restore_service_name = os.getenv('CASSANDRA_RESTORE_CLUSTER_NAME') backup_node_address = os.getenv('BACKUP_NODE_ADDRESS', DEFAULT_NODE_ADDRESS) backup_node_port = os.getenv('BACKUP_NODE_PORT', DEFAULT_NODE_PORT) backup_write_data_job = get_write_data_job(backup_node_address, backup_node_port) backup_verify_data_job = get_verify_data_job(backup_node_address, backup_node_port) backup_delete_data_job = get_delete_data_job(backup_node_address, backup_node_port) backup_verify_deletion_job = get_verify_deletion_job( backup_node_address, backup_node_port) plan_parameters = { 'S3_BUCKET_NAME': os.getenv('AWS_BUCKET_NAME', 'infinity-framework-test'), 'AWS_ACCESS_KEY_ID': os.getenv('AWS_ACCESS_KEY_ID'), 'AWS_SECRET_ACCESS_KEY': os.getenv('AWS_SECRET_ACCESS_KEY'), 'AWS_REGION': os.getenv('AWS_REGION', 'us-west-2'), 'SNAPSHOT_NAME': str(uuid.uuid1()), 'CASSANDRA_KEYSPACES': '"testspace1 testspace2"', } backup_install_job_context = jobs.InstallJobContext([ backup_write_data_job, backup_verify_data_job, backup_delete_data_job, backup_verify_deletion_job ]) backup_run_job_context = jobs.RunJobContext( before_jobs=[backup_write_data_job, backup_verify_data_job], after_jobs=[backup_delete_data_job, backup_verify_deletion_job]) # Install and run the write/delete data jobs against backup cluster, # running dcos-cassandra-service with backup_install_job_context, backup_run_job_context: # Back this cluster up to S3 backup_parameters = { 'backup_name': plan_parameters['SNAPSHOT_NAME'], 's3_access_key': plan_parameters['AWS_ACCESS_KEY_ID'], 's3_secret_key': plan_parameters['AWS_SECRET_ACCESS_KEY'], 'external_location': 's3://{}'.format(plan_parameters['S3_BUCKET_NAME']), } dcos.http.put('{}v1/backup/start'.format( shakedown.dcos_service_url(backup_service_name)), json=backup_parameters) plan.wait_for_completed_deployment(backup_service_name) # Restore data to second instance: restore_node_address = os.getenv( 'RESTORE_NODE_ADDRESS', hosts.autoip_host('sdk-cassandra', 'node-0-server')) restore_node_port = os.getenv('RESTORE_NODE_PORT', '9052') restore_write_data_job = get_write_data_job(restore_node_address, restore_node_port) restore_verify_data_job = get_verify_data_job(restore_node_address, restore_node_port) restore_delete_data_job = get_delete_data_job(restore_node_address, restore_node_port) restore_verify_deletion_job = get_verify_deletion_job( restore_node_address, restore_node_port) restore_install_job_context = jobs.InstallJobContext([ restore_write_data_job, restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job ]) restore_run_job_context = jobs.RunJobContext(after_jobs=[ restore_verify_data_job, restore_delete_data_job, restore_verify_deletion_job ]) with restore_install_job_context, restore_run_job_context: plan.start_plan(restore_service_name, 'restore-s3', parameters=plan_parameters) plan.wait_for_completed_plan(restore_service_name, 'restore-s3')