def stream_artifact_in_chunks(self, job_id, kind, name, path, binary=False): """Stream job artifact to server in chunks""" file_size = os.path.getsize(path) object_id = generate_object_id(job_id, kind, name) object_sha = sha256_of_file(path) num_chunks = len(list(self._get_chunks(file_size))) query = Command.new(self.__ws_client.socket(), action='chunk-stream-query', object_id=object_id) query_result = self.__ws_client.send(query, assertions={'message': 'ok'}) existing_chunk_shas = {} if 'stored_chunk_shas' in query_result and query_result['stored_chunk_shas'] != '': existing_chunk_shas = dict(item.split(":") for item in query_result['stored_chunk_shas'].split(",")) log.debug("found existing stored chunks: {}".format(existing_chunk_shas)) matching_uploaded_chunks = 0 try: for chunk_start, chunk_size, chunk_id in self._get_chunks(file_size): # skip if server already has chunk stored if str(chunk_id) in existing_chunk_shas: log.info("chunk {} already exists on server skipping upload".format(chunk_id)) continue log.info("sending artifact[{}][{}] chunk: {}".format(name, object_id, chunk_id)) command = Command.new(self.__ws_client.socket(), action='chunk-stream', test_id=job_id, file_size=file_size, num_of_chunks=num_chunks, chunk_id=chunk_id, object_id=object_id, object_sha=object_sha, chunk_size=chunk_size, kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary) response = self.__ws_client.send(command, assertions={'message': 'ready'}) chunk_sha = hashlib.sha256() with open(path) as fh: fh.seek(chunk_start, os.SEEK_SET) while fh.tell() < (chunk_start + chunk_size): byte_size = 512 if fh.tell() + 512 < (chunk_start + chunk_size) else (chunk_start + chunk_size) - fh.tell() data = fh.read(byte_size) chunk_sha.update(data) data = base64.b64encode(data) self.__ws_client.send(data) self.__ws_client.send(base64.b64encode(EOF_MARKER)) response = self.__ws_client.receive(response, assertions={'message': 'chunk_received', 'done': True}) if 'chunk_sha' in response and chunk_sha.hexdigest() == response['chunk_sha']: matching_uploaded_chunks += 1 else: log.error('chunk upload failed: response[{}], objectid: [{}], chunkid: [{}], totalchunks: [{}], name: [{}]' .format(response, object_id, chunk_id, num_chunks, name)) finally: uploaded_successfully = matching_uploaded_chunks == num_chunks log.info("UPLOADED SUCCESS: {}".format(uploaded_successfully)) self.__ws_client.send(Command.new(self.__ws_client.socket(), action='chunk-stream-complete', successful=uploaded_successfully, test_id=job_id, object_id=object_id, kind=kind, name=name), assertions={'message': 'ok'})
def __authenticate(self, command): """Sign the token the server asked us to sign. Send it back. Give the server a token of our own to sign. Verify it.""" assert command.get('action') == 'authenticate' data = {'signature' :self.__client_key.sign_message(command['token']), 'cluster': self.__cluster_name} response = command.respond(**data) if response.get('authenticated') != True: raise UnauthenticatedError("Our peer could not validate our signed auth token") # cool, the server authenticated us, now we need to # authenticate the server: token = random_token() cmd = Command.new(self.ws, action='authenticate', token=token) response = cmd.send() signature = response['signature'] # Verify the signature, raises BadSignatureError if it fails: try: self.__server_key.verify_message(token, signature) except: response.respond(message='Bad Signature of token for authentication', done=True) log.error('server provided bad signature for auth token') raise response.respond(authenticated=True, done=True)
def authenticate(): token_to_sign = random_token() cmd = Command.new(ws, action='authenticate', token=token_to_sign) response = cmd.send() context['cluster'] = cluster = response['cluster'] client_pubkey = db.get_pub_key(cluster) client_apikey = APIKey(client_pubkey['pubkey']) # Verify the client correctly signed the token: try: client_apikey.verify_message(token_to_sign, response.get('signature')) except: response.respond( message='Bad Signature of token for authentication', done=True) log.error('client provided bad signature for auth token') raise response.respond(authenticated=True, done=True) # Client will ask us to authenticate too: command = receive_data(ws) assert command.get('action') == 'authenticate' data = {'signature': context['apikey'].sign_message(command['token'])} response = command.respond(**data) if response.get('authenticated') != True: raise UnauthenticatedError( "Our peer could not validate our signed auth token")
def __authenticate(self, command): """Sign the token the server asked us to sign. Send it back. Give the server a token of our own to sign. Verify it.""" assert command.get('action') == 'authenticate' data = {'signature': self.__client_key.sign_message(command['token']), 'cluster': self.__cluster_name} response = command.respond(**data) if not response.get('authenticated'): raise UnauthenticatedError("Our peer could not validate our signed auth token") # cool, the server authenticated us, now we need to # authenticate the server: token = random_token() cmd = Command.new(self.socket(), action='authenticate', token=token) response = cmd.send() signature = response['signature'] # Verify the signature, raises BadSignatureError if it fails: try: self.__server_key.verify_message(token, signature) except: response.respond(message='Bad Signature of token for authentication', done=True) log.error('server provided bad signature for auth token') raise response.respond(authenticated=True, done=True)
def authenticate(): token_to_sign = random_token() cmd = Command.new(ws, action='authenticate', token=token_to_sign) response = cmd.send() context['cluster'] = cluster = response['cluster'] client_pubkey = db.get_pub_key(cluster) client_apikey = APIKey(client_pubkey['pubkey']) # Verify the client correctly signed the token: try: client_apikey.verify_message(token_to_sign, response.get('signature')) except: response.respond(message='Bad Signature of token for authentication', done=True) log.error('client provided bad signature for auth token') raise response.respond(authenticated=True, done=True) # Client will ask us to authenticate too: command = receive_data(ws) assert command.get('action') == 'authenticate' data = {'signature' :context['apikey'].sign_message(command['token'])} response = command.respond(**data) if response.get('authenticated') != True: raise UnauthenticatedError("Our peer could not validate our signed auth token")
def stream_artifact(self, job_id, kind, name, path, binary=False): """Stream job artifact to server""" # Inform the server we will be streaming an artifact: command = Command.new(self.__ws_client.socket(), action='stream', test_id=job_id, kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary) response = self.__ws_client.send(command, assertions={'message': 'ready'}) fsize = format_bytesize(os.stat(path).st_size) with open(path) as f: log.info('Streaming {name} - {path} ({fsize})'.format(name=name, path=path, fsize=fsize)) while True: data = f.read(512) if data == '': break data = base64.b64encode(data) self.__ws_client.send(data) self.__ws_client.send(base64.b64encode(EOF_MARKER)) self.__ws_client.receive(response, assertions={ 'message': 'stream_received', 'done': True })
def __job_done(self, job_id, status='completed', message=None, stacktrace=None): """Tell the server we're done with a job, and give it the test artifacts""" ##{type:'command', command_id:'llll', action:'test_done', test_id:'xxxxxxx'} command = Command.new(self.ws, action="test_done", test_id=job_id, status=status) if message is not None: command['message'] = message if stacktrace is not None: command['stacktrace'] = stacktrace log.debug("Sending job completion message for {test_id} ...".format(test_id=job_id)) response = command.send() ##{type:'response', command_id:'llll', test_id:'xxxxxx', message='test_update', done:true} assert response['test_id'] == job_id assert response['message'] == 'test_update' assert response['done'] == True log.debug("Server confirms job {test_id} is complete.".format(test_id=job_id))
def __job_done(self, job_id, status='completed', message=None, stacktrace=None): """Tell the server we're done with a job, and give it the test artifacts""" ##{type:'command', command_id:'llll', action:'test_done', test_id:'xxxxxxx'} command = Command.new(self.__ws_client.socket(), action="test_done", test_id=job_id, status=status) if message is not None: command['message'] = message if stacktrace is not None: command['stacktrace'] = stacktrace log.debug("Sending job completion message for {test_id} ...".format(test_id=job_id)) response = command.send() ##{type:'response', command_id:'llll', test_id:'xxxxxx', message='test_update', done:true} assert response['test_id'] == job_id assert response['message'] == 'test_update' assert response['done'] == True log.debug("Server confirms job {test_id} is complete.".format(test_id=job_id))
def stream_artifact(self, job_id, kind, name, path, binary=False): """Stream job artifact to server""" # Inform the server we will be streaming an artifact: command = Command.new(self.__ws_client.socket(), action='stream', test_id=job_id, kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary) response = self.__ws_client.send(command, assertions={'message': 'ready'}) fsize = format_bytesize(os.stat(path).st_size) with open(path) as f: log.info('Streaming {name} - {path} ({fsize})'.format(name=name, path=path, fsize=fsize)) while True: data = f.read(512) if data == '': break data = base64.b64encode(data) self.__ws_client.send(data) self.__ws_client.send(base64.b64encode(EOF_MARKER)) self.__ws_client.receive(response, assertions={'message': 'stream_received', 'done': True})
def __get_work(self): """Ask the server for work""" command = Command.new(self.ws, action='get_work') response = command.send() while True: # We either got a job, or we received a wait request: if response.get('action') == 'wait': response = response.receive() continue elif response.has_key('test'): break else: raise AssertionError( 'Response was neither a wait action, nor contained ' 'any test for us to run: {response}'.format(response=response)) job = response['test'] test_id = job['test_id'] response = response.respond(test_id=test_id, status='prepared') assert response['status'] == 'in_progress' return job
def __get_work(self): """Ask the server for work""" command = Command.new(self.__ws_client.socket(), action='get_work') response = command.send() while True: # We either got a job, or we received a wait request: if response.get('action') == 'wait': response = response.receive() continue elif response.has_key('test'): break else: raise AssertionError( 'Response was neither a wait action, nor contained ' 'any test for us to run: {response}'.format(response=response)) job = response['test'] test_id = job['test_id'] response = response.respond(test_id=test_id, status='prepared') assert response['status'] == 'in_progress' return job
def __good_bye(self): """Tell the server we're disconnecting""" command = Command.new(self.socket(), action="good_bye") log.debug("Sending goodbye message to server..") command.send(await_response=False)
def perform_job(self, job): """Perform a job the server gave us, stream output and artifacts to the given websocket.""" job = copy.deepcopy(job['test_definition']) # Cleanup the job structure according to what stress_compare needs: for operation in job['operations']: operation['type'] = operation['operation'] del operation['operation'] job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id']) mkpath(job_dir) stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id'])) summary_path = os.path.join(job_dir,'stats_summary.{test_id}.json'.format(test_id=job['test_id'])) stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id'])) stress_json = json.dumps(dict(revisions=job['revisions'], operations=job['operations'], title=job['title'], leave_data=job.get('leave_data', False), log=stats_path)) # Create a temporary location to store the stress_compare json file: stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id'])) with open(stress_json_path, 'w') as f: f.write(stress_json) # Inform the server we will be streaming the console output to them: command = Command.new(self.__ws_client.socket(), action='stream', test_id=job['test_id'], kind='console', name="stress_compare.{test_id}.log".format(test_id=job['test_id']), eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER) response = self.__ws_client.send(command, assertions={'message':'ready'}) # Start a status checking thread. # If a user cancel's the job after it's marked in_progress, we # need to periodically check for that state change and kill # our test: cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id']) cancel_checker.start() # stats file observer # looks for changes to update server with status progress message observer = Observer() observer.schedule(UpdateServerProgressMessageHandler(job, urlparse.urlparse(self.ws_endpoint).netloc), os.path.join(os.path.expanduser("~"), '.cstar_perf', 'jobs'), recursive=True) observer.start() # Run stress_compare in a separate process, collecting the # output as an artifact: try: # Run stress_compare with pexpect. subprocess.Popen didn't # work due to some kind of tty issue when invoking # nodetool. stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None) with open(stress_log_path, 'w') as stress_log: while True: try: with timeout(25): line = stress_proc.readline() if line == '': break stress_log.write(line) sys.stdout.write(line) self.__ws_client.send(base64.b64encode(line)) except TimeoutError: self.__ws_client.send(base64.b64encode(KEEPALIVE_MARKER)) finally: cancel_checker.stop() observer.stop() self.__ws_client.send(base64.b64encode(EOF_MARKER)) response = self.__ws_client.receive(response, assertions={'message': 'stream_received', 'done': True}) # Find the log tarball for each revision by introspecting the stats json: system_logs = [] flamegraph_logs = [] yourkit_logs = [] log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'logs') flamegraph_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'flamegraph') yourkit_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'yourkit') #Create a stats summary file without voluminous interval data if os.path.isfile(stats_path): with open(stats_path) as stats: stats = json.loads(stats.read()) for rev in stats['revisions']: system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log']))) fg_path = os.path.join(flamegraph_dir, "{name}.tar.gz".format(name=rev['last_log'])) yourkit_path = os.path.join(yourkit_dir, "{name}.tar.gz".format(name=rev['last_log'])) if os.path.exists(fg_path): flamegraph_logs.append(fg_path) if os.path.exists(yourkit_path): yourkit_logs.append(yourkit_path) with open(summary_path, 'w') as summary: hadStats = False for op in stats['stats']: if op['type'] == 'stress': try: del op['intervals'] hadStats = True except KeyError: pass try: del op['output'] except KeyError: pass if hadStats: json.dump(obj=stats, fp=summary, sort_keys=True, indent=4, separators=(',', ': ')) # Make a new tarball containing all the revision logs: tmptardir = tempfile.mkdtemp() try: job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(job_log_dir) for x, syslog in enumerate(system_logs, 1): with tarfile.open(syslog) as tar: tar.extractall(job_log_dir) os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x))) system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(system_logs_path, 'w:gz') as tar: with cd(tmptardir): tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(system_logs_path) finally: shutil.rmtree(tmptardir) # Make a new tarball containing all the flamegraph and data if flamegraph_logs: tmptardir = tempfile.mkdtemp() try: flamegraph_tmp_dir = os.path.join(tmptardir, 'flamegraph_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(flamegraph_tmp_dir) for x, flamegraph in enumerate(flamegraph_logs, 1): with tarfile.open(flamegraph) as tar: tar.extractall(flamegraph_tmp_dir) tmp_dir = os.path.join(flamegraph_tmp_dir, tar.getnames()[0]) # Copy all flamegraph as artifacts for node_dir in os.listdir(tmp_dir): glob_match = os.path.join(os.path.join(tmp_dir, node_dir), '*.svg') graphs = glob.glob(glob_match) for graph in graphs: graph_name = os.path.basename(graph).replace( 'flamegraph_', 'flamegraph_{}_{}_'.format(job['test_id'], node_dir)) graph_dst_filename = os.path.join(job_dir, graph_name) shutil.copyfile(graph, graph_dst_filename) os.rename(tmp_dir, os.path.join(flamegraph_tmp_dir, 'revision_{x:02d}'.format(x=x))) flamegraph_job_path = os.path.join(job_dir, 'flamegraph_logs.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(flamegraph_job_path, 'w:gz') as tar: with cd(tmptardir): tar.add('flamegraph_logs.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(flamegraph_job_path) finally: shutil.rmtree(tmptardir) # Make a new tarball containing all the flamegraph and data if yourkit_logs: tmptardir = tempfile.mkdtemp() try: yourkit_tmp_dir = os.path.join(tmptardir, 'yourkit.{test_id}'.format(test_id=job['test_id'])) os.mkdir(yourkit_tmp_dir) for x, yourkit in enumerate(yourkit_logs, 1): with tarfile.open(yourkit) as tar: tar.extractall(yourkit_tmp_dir) tmp_dir = os.path.join(yourkit_tmp_dir, tar.getnames()[0]) os.rename(tmp_dir, os.path.join(yourkit_tmp_dir, 'revision_{x:02d}'.format(x=x))) yourkit_job_path = os.path.join(job_dir, 'yourkit.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(yourkit_job_path, 'w:gz') as tar: with cd(tmptardir): tar.add('yourkit.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(yourkit_job_path) finally: shutil.rmtree(tmptardir) ## Stream artifacts ## Write final job status to 0.job_status file final_status = 'local_complete' try: # Stream artifacts: self.stream_artifacts(job['test_id']) if self.__ws_client.in_sync(): final_status = 'server_complete' # Spot check stats to ensure it has the data it should # contain. Raises JobFailure if something's amiss. try: self.__spot_check_stats(job, stats_path) except JobFailure, e: if final_status == 'server_complete': final_status = 'server_fail' else: final_status = 'local_fail' raise finally: with open(os.path.join(job_dir, '0.job_status'), 'w') as f: f.write(final_status)
def __good_bye(self): """Tell the server we're disconnecting""" ##{type:'command', command_id:'llll', action:'good_bye'} command = Command.new(self.ws, action="good_bye") log.debug("Sending goodbye message to server..") command.send(await_response=False)
def perform_job(self, job, ws): """Perform a job the server gave us, stream output and artifacts to the given websocket.""" job = copy.deepcopy(job['test_definition']) # Cleanup the job structure according to what stress_compare needs: for operation in job['operations']: operation['type'] = operation['operation'] del operation['operation'] job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id']) mkpath(job_dir) stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id'])) stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id'])) stress_json = json.dumps(dict(revisions=job['revisions'], operations=job['operations'], title=job['title'], log=stats_path)) # Create a temporary location to store the stress_compare json file: stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id'])) with open(stress_json_path, 'w') as f: f.write(stress_json) # Inform the server we will be streaming the console output to them: command = Command.new(self.ws, action='stream', test_id=job['test_id'], kind='console', name='console_out', eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER) response = self.send(command, assertions={'message':'ready'}) # Start a status checking thread. # If a user cancel's the job after it's marked in_progress, we # need to periodically check for that state change and kill # our test: cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id']) cancel_checker.start() # Run stress_compare in a separate process, collecting the # output as an artifact: try: # Run stress_compare with pexpect. subprocess.Popen didn't # work due to some kind of tty issue when invoking # nodetool. stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None) with open(stress_log_path, 'w') as stress_log: while True: try: with timeout(25): line = stress_proc.readline() if line == '': break stress_log.write(line) sys.stdout.write(line) self.send(base64.b64encode(line)) except TimeoutError: self.send(base64.b64encode(KEEPALIVE_MARKER)) finally: cancel_checker.stop() self.send(base64.b64encode(EOF_MARKER)) response = self.receive(response, assertions={'message':'stream_received', 'done':True}) # Find the log tarball for each revision by introspecting the stats json: system_logs = [] log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf','logs') with open(stats_path) as stats: stats = json.loads(stats.read()) for rev in stats['revisions']: system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log']))) # Make a new tarball containing all the revision logs: tmptardir = tempfile.mkdtemp() try: job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(job_log_dir) for x, syslog in enumerate(system_logs, 1): with tarfile.open(syslog) as tar: tar.extractall(job_log_dir) os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x))) system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(system_logs_path, 'w:gz') as tar: with cd(tmptardir): tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(system_logs_path) finally: shutil.rmtree(tmptardir) ## Stream artifacts ## Write final job status to 0.job_status file final_status = 'local_complete' try: # Stream artifacts: self.stream_artifacts(job['test_id']) if self.__server_synced: final_status = 'server_complete' # Spot check stats to ensure it has the data it should # contain. Raises JobFailure if something's amiss. try: self.__spot_check_stats(job, stats_path) except JobFailure, e: if final_status == 'server_complete': final_status = 'server_fail' else: final_status = 'local_fail' raise finally: with open(os.path.join(job_dir,'0.job_status'), 'w') as f: f.write(final_status)
def stream_artifact_in_chunks(self, job_id, kind, name, path, binary=False): """Stream job artifact to server in chunks""" file_size = os.path.getsize(path) object_id = generate_object_id(job_id, kind, name) object_sha = sha256_of_file(path) num_chunks = len(list(self._get_chunks(file_size))) query = Command.new(self.__ws_client.socket(), action='chunk-stream-query', object_id=object_id) query_result = self.__ws_client.send(query, assertions={'message': 'ok'}) existing_chunk_shas = {} if 'stored_chunk_shas' in query_result and query_result[ 'stored_chunk_shas'] != '': existing_chunk_shas = dict( item.split(":") for item in query_result['stored_chunk_shas'].split(",")) log.debug( "found existing stored chunks: {}".format(existing_chunk_shas)) matching_uploaded_chunks = 0 try: for chunk_start, chunk_size, chunk_id in self._get_chunks( file_size): # skip if server already has chunk stored if str(chunk_id) in existing_chunk_shas: log.info( "chunk {} already exists on server skipping upload". format(chunk_id)) continue log.info("sending artifact[{}][{}] chunk: {}".format( name, object_id, chunk_id)) command = Command.new(self.__ws_client.socket(), action='chunk-stream', test_id=job_id, file_size=file_size, num_of_chunks=num_chunks, chunk_id=chunk_id, object_id=object_id, object_sha=object_sha, chunk_size=chunk_size, kind=kind, name=name, eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER, binary=binary) response = self.__ws_client.send( command, assertions={'message': 'ready'}) chunk_sha = hashlib.sha256() with open(path) as fh: fh.seek(chunk_start, os.SEEK_SET) while fh.tell() < (chunk_start + chunk_size): byte_size = 512 if fh.tell() + 512 < ( chunk_start + chunk_size) else ( chunk_start + chunk_size) - fh.tell() data = fh.read(byte_size) chunk_sha.update(data) data = base64.b64encode(data) self.__ws_client.send(data) self.__ws_client.send(base64.b64encode(EOF_MARKER)) response = self.__ws_client.receive(response, assertions={ 'message': 'chunk_received', 'done': True }) if 'chunk_sha' in response and chunk_sha.hexdigest( ) == response['chunk_sha']: matching_uploaded_chunks += 1 else: log.error( 'chunk upload failed: response[{}], objectid: [{}], chunkid: [{}], totalchunks: [{}], name: [{}]' .format(response, object_id, chunk_id, num_chunks, name)) finally: uploaded_successfully = matching_uploaded_chunks == num_chunks log.info("UPLOADED SUCCESS: {}".format(uploaded_successfully)) self.__ws_client.send(Command.new(self.__ws_client.socket(), action='chunk-stream-complete', successful=uploaded_successfully, test_id=job_id, object_id=object_id, kind=kind, name=name), assertions={'message': 'ok'})
def perform_job(self, job, ws): """Perform a job the server gave us, stream output and artifacts to the given websocket.""" job = copy.deepcopy(job['test_definition']) # Cleanup the job structure according to what stress_compare needs: for operation in job['operations']: operation['type'] = operation['operation'] del operation['operation'] job_dir = os.path.join(os.path.expanduser('~'),'.cstar_perf','jobs',job['test_id']) mkpath(job_dir) stats_path = os.path.join(job_dir,'stats.{test_id}.json'.format(test_id=job['test_id'])) summary_path = os.path.join(job_dir,'stats_summary.{test_id}.json'.format(test_id=job['test_id'])) stress_log_path = os.path.join(job_dir,'stress_compare.{test_id}.log'.format(test_id=job['test_id'])) stress_json = json.dumps(dict(revisions=job['revisions'], operations=job['operations'], title=job['title'], log=stats_path)) # Create a temporary location to store the stress_compare json file: stress_json_path = os.path.join(job_dir, 'test.{test_id}.json'.format(test_id=job['test_id'])) with open(stress_json_path, 'w') as f: f.write(stress_json) # Inform the server we will be streaming the console output to them: command = Command.new(self.ws, action='stream', test_id=job['test_id'], kind='console', name='console_out', eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER) response = self.send(command, assertions={'message':'ready'}) # Start a status checking thread. # If a user cancel's the job after it's marked in_progress, we # need to periodically check for that state change and kill # our test: cancel_checker = JobCancellationTracker(urlparse.urlparse(self.ws_endpoint).netloc, job['test_id']) cancel_checker.start() # Run stress_compare in a separate process, collecting the # output as an artifact: try: # Run stress_compare with pexpect. subprocess.Popen didn't # work due to some kind of tty issue when invoking # nodetool. stress_proc = pexpect.spawn('cstar_perf_stress {stress_json_path}'.format(stress_json_path=stress_json_path), timeout=None) with open(stress_log_path, 'w') as stress_log: while True: try: with timeout(25): line = stress_proc.readline() if line == '': break stress_log.write(line) sys.stdout.write(line) self.send(base64.b64encode(line)) except TimeoutError: self.send(base64.b64encode(KEEPALIVE_MARKER)) finally: cancel_checker.stop() self.send(base64.b64encode(EOF_MARKER)) response = self.receive(response, assertions={'message':'stream_received', 'done':True}) # Find the log tarball for each revision by introspecting the stats json: system_logs = [] log_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf','logs') with open(stats_path) as stats: stats = json.loads(stats.read()) for rev in stats['revisions']: system_logs.append(os.path.join(log_dir, "{name}.tar.gz".format(name=rev['last_log']))) with open(summary_path, 'w') as summary: for rev in job['revisions']: for op_num, op in enumerate(job['operations']): if op['type'] == 'stress': del stats['stats'][op_num]['intervals'] json.dump(obj=stats, fp=summary, sort_keys=True, indent=4, separators=(',', ': ')) # Make a new tarball containing all the revision logs: tmptardir = tempfile.mkdtemp() try: job_log_dir = os.path.join(tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(job_log_dir) for x, syslog in enumerate(system_logs, 1): with tarfile.open(syslog) as tar: tar.extractall(job_log_dir) os.rename(os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x))) system_logs_path = os.path.join(job_dir, 'cassandra_logs.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(system_logs_path, 'w:gz') as tar: with cd(tmptardir): tar.add('cassandra_logs.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(system_logs_path) finally: shutil.rmtree(tmptardir) ## Stream artifacts ## Write final job status to 0.job_status file final_status = 'local_complete' try: # Stream artifacts: self.stream_artifacts(job['test_id']) if self.__server_synced: final_status = 'server_complete' # Spot check stats to ensure it has the data it should # contain. Raises JobFailure if something's amiss. try: self.__spot_check_stats(job, stats_path) except JobFailure, e: if final_status == 'server_complete': final_status = 'server_fail' else: final_status = 'local_fail' raise finally: with open(os.path.join(job_dir,'0.job_status'), 'w') as f: f.write(final_status)
def perform_job(self, job): """Perform a job the server gave us, stream output and artifacts to the given websocket.""" job = copy.deepcopy(job['test_definition']) # Cleanup the job structure according to what stress_compare needs: for operation in job['operations']: operation['type'] = operation['operation'] del operation['operation'] job_dir = os.path.join(os.path.expanduser('~'), '.cstar_perf', 'jobs', job['test_id']) mkpath(job_dir) stats_path = os.path.join( job_dir, 'stats.{test_id}.json'.format(test_id=job['test_id'])) summary_path = os.path.join( job_dir, 'stats_summary.{test_id}.json'.format(test_id=job['test_id'])) stress_log_path = os.path.join( job_dir, 'stress_compare.{test_id}.log'.format(test_id=job['test_id'])) stress_json = json.dumps( dict(revisions=job['revisions'], operations=job['operations'], title=job['title'], leave_data=job.get('leave_data', False), log=stats_path)) # Create a temporary location to store the stress_compare json file: stress_json_path = os.path.join( job_dir, 'test.{test_id}.json'.format(test_id=job['test_id'])) with open(stress_json_path, 'w') as f: f.write(stress_json) # Inform the server we will be streaming the console output to them: command = Command.new( self.__ws_client.socket(), action='stream', test_id=job['test_id'], kind='console', name="stress_compare.{test_id}.log".format(test_id=job['test_id']), eof=EOF_MARKER, keepalive=KEEPALIVE_MARKER) response = self.__ws_client.send(command, assertions={'message': 'ready'}) # Start a status checking thread. # If a user cancel's the job after it's marked in_progress, we # need to periodically check for that state change and kill # our test: cancel_checker = JobCancellationTracker( urlparse.urlparse(self.ws_endpoint).netloc, job['test_id']) cancel_checker.start() # stats file observer # looks for changes to update server with status progress message observer = Observer() observer.schedule(UpdateServerProgressMessageHandler( job, urlparse.urlparse(self.ws_endpoint).netloc), os.path.join(os.path.expanduser("~"), '.cstar_perf', 'jobs'), recursive=True) observer.start() # Run stress_compare in a separate process, collecting the # output as an artifact: try: # Run stress_compare with pexpect. subprocess.Popen didn't # work due to some kind of tty issue when invoking # nodetool. stress_proc = pexpect.spawn( 'cstar_perf_stress {stress_json_path}'.format( stress_json_path=stress_json_path), timeout=None) with open(stress_log_path, 'w') as stress_log: while True: try: with timeout(25): line = stress_proc.readline() if line == '': break stress_log.write(line) sys.stdout.write(line) self.__ws_client.send(base64.b64encode(line)) except TimeoutError: self.__ws_client.send( base64.b64encode(KEEPALIVE_MARKER)) finally: cancel_checker.stop() observer.stop() self.__ws_client.send(base64.b64encode(EOF_MARKER)) response = self.__ws_client.receive(response, assertions={ 'message': 'stream_received', 'done': True }) # Find the log tarball for each revision by introspecting the stats json: system_logs = [] flamegraph_logs = [] yourkit_logs = [] log_dir = CSTAR_PERF_LOGS_DIR flamegraph_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'flamegraph') yourkit_dir = os.path.join(os.path.expanduser("~"), '.cstar_perf', 'yourkit') #Create a stats summary file without voluminous interval data if os.path.isfile(stats_path): with open(stats_path) as stats: stats = json.loads(stats.read()) for rev in stats['revisions']: last_log_rev_id = rev.get('last_log') if last_log_rev_id: system_logs.append( os.path.join( log_dir, "{name}.tar.gz".format(name=last_log_rev_id))) fg_path = os.path.join( flamegraph_dir, "{name}.tar.gz".format(name=last_log_rev_id)) yourkit_path = os.path.join( yourkit_dir, "{name}.tar.gz".format(name=last_log_rev_id)) if os.path.exists(fg_path): flamegraph_logs.append(fg_path) if os.path.exists(yourkit_path): yourkit_logs.append(yourkit_path) with open(summary_path, 'w') as summary: hadStats = False for op in stats['stats']: if op['type'] == 'stress': try: del op['intervals'] hadStats = True except KeyError: pass try: del op['output'] except KeyError: pass if hadStats: json.dump(obj=stats, fp=summary, sort_keys=True, indent=4, separators=(',', ': ')) # Make a new tarball containing all the revision logs: tmptardir = tempfile.mkdtemp() try: startup_log_tarball = self._maybe_get_startup_log_tarball( job['test_id'], log_dir) if startup_log_tarball: system_logs.append(startup_log_tarball) job_log_dir = os.path.join( tmptardir, 'cassandra_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(job_log_dir) for x, syslog in enumerate(system_logs, 1): with tarfile.open(syslog) as tar: tar.extractall(job_log_dir) os.rename( os.path.join(job_log_dir, tar.getnames()[0]), os.path.join(job_log_dir, 'revision_{x:02d}'.format(x=x))) system_logs_path = os.path.join( job_dir, 'cassandra_logs.{test_id}.tar.gz'.format( test_id=job['test_id'])) with tarfile.open(system_logs_path, 'w:gz') as tar: with cd(tmptardir): tar.add('cassandra_logs.{test_id}'.format( test_id=job['test_id'])) assert os.path.exists(system_logs_path) finally: shutil.rmtree(tmptardir) # Make a new tarball containing all the flamegraph and data if flamegraph_logs: tmptardir = tempfile.mkdtemp() try: flamegraph_tmp_dir = os.path.join( tmptardir, 'flamegraph_logs.{test_id}'.format(test_id=job['test_id'])) os.mkdir(flamegraph_tmp_dir) for x, flamegraph in enumerate(flamegraph_logs, 1): with tarfile.open(flamegraph) as tar: tar.extractall(flamegraph_tmp_dir) tmp_dir = os.path.join(flamegraph_tmp_dir, tar.getnames()[0]) # Copy all flamegraph as artifacts for node_dir in os.listdir(tmp_dir): glob_match = os.path.join( os.path.join(tmp_dir, node_dir), '*.svg') graphs = glob.glob(glob_match) for graph in graphs: graph_name = os.path.basename(graph).replace( 'flamegraph_', 'flamegraph_{}_{}_'.format( job['test_id'], node_dir)) graph_dst_filename = os.path.join( job_dir, graph_name) shutil.copyfile(graph, graph_dst_filename) os.rename( tmp_dir, os.path.join(flamegraph_tmp_dir, 'revision_{x:02d}'.format(x=x))) flamegraph_job_path = os.path.join( job_dir, 'flamegraph_logs.{test_id}.tar.gz'.format( test_id=job['test_id'])) with tarfile.open(flamegraph_job_path, 'w:gz') as tar: with cd(tmptardir): tar.add('flamegraph_logs.{test_id}'.format( test_id=job['test_id'])) assert os.path.exists(flamegraph_job_path) finally: shutil.rmtree(tmptardir) # Make a new tarball containing all the flamegraph and data if yourkit_logs: tmptardir = tempfile.mkdtemp() try: yourkit_tmp_dir = os.path.join( tmptardir, 'yourkit.{test_id}'.format(test_id=job['test_id'])) os.mkdir(yourkit_tmp_dir) for x, yourkit in enumerate(yourkit_logs, 1): with tarfile.open(yourkit) as tar: tar.extractall(yourkit_tmp_dir) tmp_dir = os.path.join(yourkit_tmp_dir, tar.getnames()[0]) os.rename( tmp_dir, os.path.join(yourkit_tmp_dir, 'revision_{x:02d}'.format(x=x))) yourkit_job_path = os.path.join( job_dir, 'yourkit.{test_id}.tar.gz'.format(test_id=job['test_id'])) with tarfile.open(yourkit_job_path, 'w:gz') as tar: with cd(tmptardir): tar.add( 'yourkit.{test_id}'.format(test_id=job['test_id'])) assert os.path.exists(yourkit_job_path) finally: shutil.rmtree(tmptardir) ## Stream artifacts ## Write final job status to 0.job_status file final_status = 'local_complete' try: # Stream artifacts: self.stream_artifacts(job['test_id']) if self.__ws_client.in_sync(): final_status = 'server_complete' # Spot check stats to ensure it has the data it should # contain. Raises JobFailure if something's amiss. try: self.__spot_check_stats(job, stats_path) except JobFailure, e: if final_status == 'server_complete': final_status = 'server_fail' else: final_status = 'local_fail' raise finally: with open(os.path.join(job_dir, '0.job_status'), 'w') as f: f.write(final_status)