def test_get_results(self): """ Test our ability to collect the appropriate samples. """ sample = Sample() sample.accession_code = 'GSM45588' sample.save() result = ComputationalResult() result.save() computed_file1 = ComputedFile() computed_file1.filename = "oh_boy.txt" computed_file1.result = result computed_file1.size_in_bytes = 123 computed_file1.is_smashable = True computed_file1.save() computed_file2 = ComputedFile() computed_file2.filename = "gee_whiz.bmp" computed_file2.result = result computed_file2.size_in_bytes = 123 computed_file2.is_smashable = False computed_file2.save() assoc = SampleResultAssociation() assoc.sample = sample assoc.result = result assoc.save() assoc = SampleComputedFileAssociation() assoc.sample = sample assoc.computed_file = computed_file1 assoc.save() assoc = SampleComputedFileAssociation() assoc.sample = sample assoc.computed_file = computed_file2 assoc.save() computed_files = sample.get_result_files() self.assertEqual(computed_files.count(), 2)
def _run_salmontools(job_context: Dict) -> Dict: """ Run Salmontools to extract unmapped genes. """ logger.debug("Running SalmonTools ...") unmapped_filename = job_context[ "output_directory"] + "aux_info/unmapped_names.txt" command_str = "salmontools extract-unmapped -u {unmapped_file} -o {output} " output_prefix = job_context["salmontools_directory"] + "unmapped_by_salmon" command_str = command_str.format(unmapped_file=unmapped_filename, output=output_prefix) if "input_file_path_2" in job_context: command_str += "-1 {input_1} -2 {input_2}" command_str = command_str.format( input_1=job_context["input_file_path"], input_2=job_context["input_file_path_2"]) else: command_str += "-r {input_1}" command_str = command_str.format( input_1=job_context["input_file_path"]) start_time = timezone.now() logger.debug( "Running the following SalmonTools command: %s", command_str, processor_job=job_context["job_id"], ) completed_command = subprocess.run(command_str.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) end_time = timezone.now() # As of SalmonTools 0.1.0, completed_command.returncode is always 0, # (even if error happens). completed_command.stderr is not totally # reliable either, because it will output the following line even # when the execution succeeds: # "There were <N> unmapped reads\n" # in which "<N>" is the number of lines in input unmapped_names.txt. # # As a workaround, we are using a regular expression here to test # the status of SalmonTools execution. Any text in stderr that is # not in the above format is treated as error message. status_str = completed_command.stderr.decode().strip() success_pattern = r"^There were \d+ unmapped reads$" if re.match(success_pattern, status_str): # Zip up the output of salmontools try: with tarfile.open(job_context["salmontools_archive"], "w:gz") as tar: tar.add(job_context["salmontools_directory"], arcname=os.sep) except Exception: logger.exception( "Exception caught while zipping processed directory %s", job_context["salmontools_directory"], processor_job=job_context["job_id"], ) failure_template = "Exception caught while zipping salmontools directory {}" job_context["job"].failure_reason = failure_template.format( job_context["salmontools_archive"]) job_context["success"] = False return job_context result = ComputationalResult() result.commands.append(command_str) result.time_start = start_time result.time_end = end_time result.is_ccdl = True try: processor_key = "SALMONTOOLS" result.processor = utils.find_processor(processor_key) except Exception as e: return utils.handle_processor_exception(job_context, processor_key, e) result.save() job_context["pipeline"].steps.append(result.id) assoc = SampleResultAssociation() assoc.sample = job_context["sample"] assoc.result = result assoc.save() computed_file = ComputedFile() computed_file.filename = job_context["salmontools_archive"].split( "/")[-1] computed_file.absolute_file_path = job_context["salmontools_archive"] computed_file.calculate_sha1() computed_file.calculate_size() computed_file.is_public = True computed_file.is_smashable = False computed_file.is_qc = True computed_file.result = result computed_file.save() job_context["computed_files"].append(computed_file) assoc = SampleComputedFileAssociation() assoc.sample = job_context["sample"] assoc.computed_file = computed_file assoc.save() job_context["result"] = result job_context["success"] = True else: # error in salmontools logger.error( "Shell call to salmontools failed with error message: %s", status_str, processor_job=job_context["job_id"], ) job_context["job"].failure_reason = ( "Shell call to salmontools failed because: " + status_str) job_context["success"] = False return job_context