def _covert_to_cwl_format(val): file_obj = copy.deepcopy(val) location = file_obj.pop('location', None) if location: try: file_db_object = FileProcessor.get_file_obj(location) except FileHelperException as e: raise PortProcessorException('File %s not found' % location) path = file_db_object.path path_obj = Path(path) checksum = FileProcessor.get_file_checksum(file_db_object) if checksum: file_obj['checksum'] = checksum size = FileProcessor.get_file_size(file_db_object) if size: file_obj['size'] = size file_obj['basename'] = path_obj.name file_obj['nameext'] = path_obj.suffix file_obj['nameroot'] = path_obj.stem file_obj['path'] = path secondary_files = file_obj.pop('secondaryFiles', []) secondary_files_value = PortProcessor.process_files( secondary_files, PortAction.CONVERT_TO_CWL_FORMAT) if secondary_files_value: file_obj['secondaryFiles'] = secondary_files_value return file_obj
def _register_file(val, size, group_id, metadata, file_list): file_obj = copy.deepcopy(val) file_obj.pop("basename", None) file_obj.pop("nameroot", None) file_obj.pop("nameext", None) uri = file_obj.pop('location', None) checksum = file_obj.pop("checksum", None) try: file_obj_db = FileProcessor.create_file_obj(uri, size, checksum, group_id, metadata) except FileConflictException as e: logger.warning(str(e)) file_obj_db = FileProcessor.get_file_obj(uri) # TODO: Check what to do in case file already exist in DB. Note: This should never happen # raise PortProcessorException(e) secondary_files = file_obj.pop('secondaryFiles', []) secondary_file_list = [] secondary_files_obj = PortProcessor.process_files(secondary_files, PortAction.REGISTER_OUTPUT_FILES, group_id=group_id, metadata=metadata, file_list=secondary_file_list) if secondary_files_obj: file_obj['secondaryFiles'] = secondary_files_obj file_obj['location'] = FileProcessor.get_bid_from_file(file_obj_db) if file_list is not None: file_list.append('bid://%s' % FileProcessor.get_bid_from_file(file_obj_db)) file_list.extend([f['location'] for f in secondary_files_obj]) return file_obj
def to_db(self): if self.port_object: self.port_object.name = self.name self.port_object.port_type = self.port_type self.port_object.schema = self.schema self.port_object.secondary_files = self.secondary_files self.port_object.db_value = self.db_value self.port_object.value = self.value self.port_object.save() self.port_object.files.set( [FileProcessor.get_file_obj(v) for v in self.files]) self.port_object.notify = self.notify self.port_object.save() else: try: run_object = Run.objects.get(id=self.run_id) except Run.DoesNotExist: raise PortObjectConstructException( "Port save failed. Run with id: %s doesn't exist.") new_port = Port( run=run_object, name=self.name, port_type=self.port_type, schema=self.schema, secondary_files=self.secondary_files, db_value=self.db_value, value=self.value, notify=self.name in run_object.notify_for_outputs, ) new_port.save() new_port.files.set( [FileProcessor.get_file_obj(v) for v in self.files]) new_port.save() self.port_object = new_port
def get_files_from_run(r): files = list() inp_port = Port.objects.filter(run_id=r.id, name='pair').first() for p in inp_port.db_value[0]['R1']: files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['R2']: files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['zR1']: files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['zR2']: files.append(FileProcessor.get_file_path(p['location'])) return files
def get_files_from_run(r): files = list() inp_port = Port.objects.filter(run_id=r.id, name="pair").first() for p in inp_port.db_value[0]["R1"]: files.append(FileProcessor.get_file_path(p["location"])) for p in inp_port.db_value[0]["R2"]: files.append(FileProcessor.get_file_path(p["location"])) for p in inp_port.db_value[0]["zR1"]: files.append(FileProcessor.get_file_path(p["location"])) for p in inp_port.db_value[0]["zR2"]: files.append(FileProcessor.get_file_path(p["location"])) return files
def _fix_locations_in_db(val, file_list): """ Temporary method for fixing Values in DB :param val: :param file_list: :return: """ file_obj = copy.deepcopy(val) location = val.get('location') if not location: location = val.get('path') if not location: print("Couldn't fix value: %s. File doesn't exist" % file_obj) return file_obj if location.startswith('/'): location = 'juno://%s' % location elif PortProcessor.is_uuid(location): location = 'bid://%s' % location elif not location.startswith('juno://') and not location.startswith( 'bid:/'): print("Couldn't fix value: %s" % file_obj) return file_obj try: bid = FileProcessor.get_file_id(location) except FileHelperException as e: print("Couldn't fix value: %s. File doesn't exist" % file_obj) return file_obj file_obj['location'] = 'bid://%s' % bid if file_obj.get('path'): file_obj.pop('path') if file_list is not None: file_list.append('bid://%s' % bid) return file_obj
def test_create_file_obj_bad_file_group(self): file_group_id = str(uuid.uuid4()) with self.assertRaises(Exception) as context: file_obj = FileProcessor.create_file_obj( "file:///path/to/file.unknown_data_type", 123345, "sha1$calculated checksum", file_group_id, {} ) self.assertTrue("Invalid FileGroup id: %s" % file_group_id in context.exception)
def _send_as_notification(val, job_group): uri = val.get('location') path = FileProcessor.parse_path_from_uri(uri) file_name = os.path.basename(path) if job_group: event = UploadAttachmentEvent(str(job_group.id), file_name, path, download=True) send_notification.delay(event.to_dict()) logger.info("Can't upload file:%s. JobGroup not specified", path) return val
def ready(self): [CWLPortObject.ready(p) for p in self.inputs] samples = set() for p in self.inputs: for f in p.files: file_obj = FileProcessor.get_file_obj(f) if file_obj.sample: samples.add(file_obj.sample) self.samples = list(samples) [CWLPortObject.ready(p) for p in self.outputs] self.status = RunStatus.READY
def _register_file(val, size, group_id, metadata, file_list): file_obj = copy.deepcopy(val) file_obj.pop("basename", None) file_obj.pop("nameroot", None) file_obj.pop("nameext", None) uri = file_obj.pop("location", None) checksum = file_obj.pop("checksum", None) try: file_obj_db = FileProcessor.create_file_obj(uri, size, checksum, group_id, metadata) except FileConflictException as e: logger.warning(str(e)) # TODO: Check what to do in case file already exist in DB. file_obj_db = FileProcessor.get_file_obj(uri) FileProcessor.update_file(file_obj_db, file_obj_db.path, metadata) secondary_files = file_obj.pop("secondaryFiles", []) secondary_file_list = [] secondary_files_obj = PortProcessor.process_files( secondary_files, PortAction.REGISTER_OUTPUT_FILES, group_id=group_id, metadata=metadata, file_list=secondary_file_list, ) if secondary_files_obj: file_obj["secondaryFiles"] = secondary_files_obj file_obj["location"] = FileProcessor.get_bid_from_file(file_obj_db) if file_list is not None: file_list.append("bid://%s" % FileProcessor.get_bid_from_file(file_obj_db)) file_list.extend([f["location"] for f in secondary_files_obj]) return file_obj
def populate_run_samples(apps, _): Run = apps.get_model('runner', 'Run') for run in Run.objects.all(): samples = set() try: run_obj = RunObject.from_db(run.id) except Exception as e: print("Run %s can't be migrated" % str(run.id)) for p in run_obj.inputs: for f in p.files: file_obj = FileProcessor.get_file_obj(f) if file_obj.sample: samples.add(file_obj.sample) run_obj.samples = list(samples) run_obj.to_db()
def from_db(cls, port_id): try: port = Port.objects.get(id=port_id) except Port.DoesNotExist: raise PortObjectConstructException('Port with id:') return cls(str(port.run.id), port.name, port.port_type, port.schema, port.secondary_files, port.db_value, port.value, [FileProcessor.get_bid_from_file(f) for f in port.files.all()], port_id=port_id, notify=port.notify)
def _convert_to_path(val): file_obj = copy.deepcopy(val) location = file_obj.pop("location", None) if not location and val.get("contents"): logger.debug("Processing file literal %s", str(val)) return val try: path = FileProcessor.get_file_path(location) except FileHelperException as e: raise PortProcessorException("File %s not found" % location) secondary_files = file_obj.pop("secondaryFiles", []) secondary_files_value = PortProcessor.process_files(secondary_files, PortAction.CONVERT_TO_PATH) if secondary_files_value: file_obj["secondaryFiles"] = secondary_files_value file_obj["path"] = path return file_obj
def _update_location_to_bid(val, file_list): file_obj = copy.deepcopy(val) location = val.get('location') if not location and val.get('contents'): logger.debug("Processing file literal %s", str(val)) return val bid = FileProcessor.get_file_id(location) file_obj['location'] = 'bid://%s' % bid secondary_files = file_obj.pop('secondaryFiles', []) secondary_file_list = [] secondary_files_obj = PortProcessor.process_files(secondary_files, PortAction.CONVERT_TO_BID, file_list=secondary_file_list) if secondary_files_obj: file_obj['secondaryFiles'] = secondary_files_obj if file_obj.get('path'): file_obj.pop('path') if file_list is not None: file_list.append('bid://%s' % bid) file_list.extend([f['location'] for f in secondary_files_obj]) return file_obj
def test_run_complete_job( self, mock_populate_job_group_notifier, mock_get_pipeline, memcache_task_lock, send_notification ): with open("runner/tests/run/pair-workflow.cwl", "r") as f: app = json.load(f) with open("runner/tests/run/inputs.json", "r") as f: inputs = json.load(f) mock_populate_job_group_notifier.return_value = None mock_get_pipeline.return_value = app memcache_task_lock.return_value = True send_notification.return_value = False run = RunObjectFactory.from_definition(str(self.run.id), inputs) run.to_db() operator_run = OperatorRun.objects.first() operator_run.runs.add(run.run_obj) num_completed_runs = operator_run.num_completed_runs complete_job(run.run_id, self.outputs) operator_run.refresh_from_db() self.assertEqual(operator_run.num_completed_runs, num_completed_runs + 1) run_obj = RunObjectFactory.from_db(run.run_id) file_obj = File.objects.filter(path=self.outputs["maf"]["location"].replace("file://", "")).first() run_obj.to_db() for out in run_obj.outputs: if out.name == "maf": self.assertEqual(out.value["location"], self.outputs["maf"]["location"]) self.assertEqual(FileProcessor.get_bid_from_file(file_obj), out.db_value["location"]) port = Port.objects.filter(run_id=run_obj.run_id, name="bams").first() self.assertEqual(len(port.files.all()), 4) expected_result = ( "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam", "/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai", ) self.assertTrue(port.files.all()[0].path in expected_result) self.assertTrue(port.files.all()[1].path in expected_result) self.assertTrue(port.files.all()[2].path in expected_result) self.assertTrue(port.files.all()[3].path in expected_result)
def test_run_complete_job(self, mock_get_pipeline): with open('runner/tests/run/pair-workflow.cwl', 'r') as f: app = json.load(f) with open('runner/tests/run/inputs.json', 'r') as f: inputs = json.load(f) mock_get_pipeline.return_value = app run = RunObject.from_cwl_definition(str(self.run.id), inputs) run.to_db() operator_run = OperatorRun.objects.first() operator_run.runs.add(run.run_obj) num_completed_runs = operator_run.num_completed_runs complete_job(run.run_id, self.outputs) operator_run.refresh_from_db() self.assertEqual(operator_run.num_completed_runs, num_completed_runs + 1) run_obj = RunObject.from_db(run.run_id) file_obj = File.objects.filter(path=self.outputs['maf']['location']. replace('file://', '')).first() run_obj.to_db() for out in run_obj.outputs: if out.name == 'maf': self.assertEqual(out.value['location'], self.outputs['maf']['location']) self.assertEqual(FileProcessor.get_bid_from_file(file_obj), out.db_value['location']) port = Port.objects.filter(run_id=run_obj.run_id, name='bams').first() self.assertEqual(len(port.files.all()), 4) expected_result = ( '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bam', '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_1.rg.md.abra.printreads.bai', '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bam', '/output/argos_pair_workflow/425194f6-a974-4c2f-995f-f27d7ba54ddc/outputs/test_2.rg.md.abra.printreads.bai' ) self.assertTrue(port.files.all()[0].path in expected_result) self.assertTrue(port.files.all()[1].path in expected_result) self.assertTrue(port.files.all()[2].path in expected_result) self.assertTrue(port.files.all()[3].path in expected_result)
def get_jobs(self): files = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "igocomplete": True }) argos_jobs = list() cnt_tumors = FileRepository.filter(queryset=self.files, metadata={ "requestId": self.request_id, "tumorOrNormal": "Tumor", "igocomplete": True }).count() if cnt_tumors == 0: cant_do = CantDoEvent(self.job_group_notifier_id).to_dict() send_notification.delay(cant_do) all_normals_event = SetLabelEvent(self.job_group_notifier_id, "all_normals").to_dict() send_notification.delay(all_normals_event) return argos_jobs data = list() for f in files: sample = dict() sample["id"] = f.file.id sample["path"] = f.file.path sample["file_name"] = f.file.file_name sample["metadata"] = f.metadata data.append(sample) files = list() samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample["metadata"]["sampleId"] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) argos_inputs, error_samples = construct_argos_jobs(samples) number_of_inputs = len(argos_inputs) sample_pairing = "" sample_mapping = "" pipeline = self.get_pipeline_id() try: pipeline_obj = Pipeline.objects.get(id=pipeline) except Pipeline.DoesNotExist: pass for i, job in enumerate(argos_inputs): tumor_sample_name = job["pair"][0]["ID"] for p in job["pair"][0]["R1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["R2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["zR1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][0]["zR2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([tumor_sample_name, filepath ]) + "\n" files.append(filepath) normal_sample_name = job["pair"][1]["ID"] for p in job["pair"][1]["R1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["R2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["zR1"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["zR2"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job["pair"][1]["bam"]: filepath = FileProcessor.parse_path_from_uri(p["location"]) if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) name = "ARGOS %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) assay = job["assay"] pi = job["pi"] pi_email = job["pi_email"] sample_pairing += "\t".join( [normal_sample_name, tumor_sample_name]) + "\n" tags = { "requestId": self.request_id, "sampleNameTumor": tumor_sample_name, "sampleNameNormal": normal_sample_name, "labHeadName": pi, "labHeadEmail": pi_email, } argos_jobs.append( RunCreator(app=pipeline, inputs=job, name=name, tags=tags)) operator_run_summary = UploadAttachmentEvent( self.job_group_notifier_id, "sample_pairing.txt", sample_pairing).to_dict() send_notification.delay(operator_run_summary) mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id, "sample_mapping.txt", sample_mapping).to_dict() send_notification.delay(mapping_file_event) data_clinical = generate_sample_data_content( files, pipeline_name=pipeline_obj.name, pipeline_github=pipeline_obj.github, pipeline_version=pipeline_obj.version, ) sample_data_clinical_event = UploadAttachmentEvent( self.job_group_notifier_id, "sample_data_clinical.txt", data_clinical).to_dict() send_notification.delay(sample_data_clinical_event) self.evaluate_sample_errors(error_samples) self.summarize_pairing_info(argos_inputs) return argos_jobs
def get_jobs(self): argos_jobs = list() if self.request_id: files = FileRepository.filter(queryset=self.files, metadata={ 'requestId': self.request_id, 'igocomplete': True }, filter_redact=True) cnt_tumors = FileRepository.filter(queryset=self.files, metadata={ 'requestId': self.request_id, 'tumorOrNormal': 'Tumor', 'igocomplete': True }, filter_redact=True).count() elif self.pairing: files, cnt_tumors = self.get_files_for_pairs() if cnt_tumors == 0: cant_do = CantDoEvent(self.job_group_notifier_id).to_dict() send_notification.delay(cant_do) all_normals_event = SetLabelEvent(self.job_group_notifier_id, 'all_normals').to_dict() send_notification.delay(all_normals_event) return argos_jobs data = list() for f in files: sample = dict() sample['id'] = f.file.id sample['path'] = f.file.path sample['file_name'] = f.file.file_name sample['metadata'] = f.metadata data.append(sample) files = list() samples = list() # group by igoId igo_id_group = dict() for sample in data: igo_id = sample['metadata']['sampleId'] if igo_id not in igo_id_group: igo_id_group[igo_id] = list() igo_id_group[igo_id].append(sample) for igo_id in igo_id_group: samples.append(build_sample(igo_id_group[igo_id])) argos_inputs, error_samples = construct_argos_jobs( samples, self.pairing) number_of_inputs = len(argos_inputs) sample_pairing = "" sample_mapping = "" pipeline = self.get_pipeline_id() try: pipeline_obj = Pipeline.objects.get(id=pipeline) except Pipeline.DoesNotExist: pass check_for_duplicates = list() for i, job in enumerate(argos_inputs): tumor_sample_name = job['pair'][0]['ID'] for p in job['pair'][0]['R1']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([tumor_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][0]['R2']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([tumor_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][0]['zR1']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([tumor_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][0]['zR2']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([tumor_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) normal_sample_name = job['pair'][1]['ID'] for p in job['pair'][1]['R1']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([normal_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][1]['R2']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([normal_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: sample_mapping += "\t".join([normal_sample_name, filepath ]) + "\n" files.append(filepath) for p in job['pair'][1]['zR1']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([normal_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][1]['zR2']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([normal_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) for p in job['pair'][1]['bam']: filepath = FileProcessor.parse_path_from_uri(p['location']) file_str = "\t".join([normal_sample_name, filepath]) + "\n" if file_str not in check_for_duplicates: check_for_duplicates.append(file_str) sample_mapping += file_str if filepath not in files: files.append(filepath) name = "ARGOS %s, %i of %i" % (self.request_id, i + 1, number_of_inputs) assay = job['assay'] pi = job['pi'] pi_email = job['pi_email'] sample_pairing += "\t".join( [normal_sample_name, tumor_sample_name]) + "\n" argos_jobs.append((APIRunCreateSerializer( data={ 'app': pipeline, 'inputs': argos_inputs, 'name': name, 'tags': { 'requestId': self.request_id, 'sampleNameTumor': tumor_sample_name, 'sampleNameNormal': normal_sample_name, 'labHeadName': pi, 'labHeadEmail': pi_email } }), job)) operator_run_summary = UploadAttachmentEvent( self.job_group_notifier_id, 'sample_pairing.txt', sample_pairing).to_dict() send_notification.delay(operator_run_summary) mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id, 'sample_mapping.txt', sample_mapping).to_dict() send_notification.delay(mapping_file_event) data_clinical = generate_sample_data_content( files, pipeline_name=pipeline_obj.name, pipeline_github=pipeline_obj.github, pipeline_version=pipeline_obj.version) sample_data_clinical_event = UploadAttachmentEvent( self.job_group_notifier_id, 'sample_data_clinical.txt', data_clinical).to_dict() send_notification.delay(sample_data_clinical_event) self.evaluate_sample_errors(error_samples) self.summarize_pairing_info(argos_inputs) return argos_jobs
def generate_sample_pairing_and_mapping_files(run_ids): sample_pairing = "" sample_mapping = "" runs = Run.objects.filter(id__in=run_ids) request_id_set = set() files = list() if runs: pipeline = runs[0].app for r in runs: request_id_set.add(r.tags['requestId']) inp_port = Port.objects.filter(run_id=r.id, name='pair').first() tumor_sample_name = inp_port.db_value[0]['ID'] for p in inp_port.db_value[0]['R1']: sample_mapping += "\t".join([ tumor_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['R2']: sample_mapping += "\t".join([ tumor_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['zR1']: sample_mapping += "\t".join([ tumor_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[0]['zR2']: sample_mapping += "\t".join([ tumor_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) normal_sample_name = inp_port.db_value[1]['ID'] for p in inp_port.db_value[1]['R1']: sample_mapping += "\t".join([ normal_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[1]['R2']: sample_mapping += "\t".join([ normal_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[1]['zR1']: sample_mapping += "\t".join([ normal_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[1]['zR2']: sample_mapping += "\t".join([ normal_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) for p in inp_port.db_value[1]['bam']: sample_mapping += "\t".join([ normal_sample_name, FileProcessor.get_file_path(p['location']) ]) + "\n" files.append(FileProcessor.get_file_path(p['location'])) sample_pairing += "\t".join([normal_sample_name, tumor_sample_name ]) + "\n" if runs: data_clinical = generate_sample_data_content( files, pipeline_name=pipeline.name, pipeline_github=pipeline.github, pipeline_version=pipeline.version) return sample_mapping, sample_pairing, data_clinical
def test_create_file_type_unknown(self): file_obj = FileProcessor.create_file_obj( 'file:///path/to/file.unknown_data_type', 123345, 'sha1$calculated checksum', str(self.file_group.id), {}) self.assertEqual(file_obj.file_type, self.file_type_unknown)
def test_create_file_setting_proper_file_type_based_on_extension(self): file_obj = FileProcessor.create_file_obj( 'file:///path/to/file.fastq.gz', 123345, 'sha1$calculated checksum', str(self.file_group.id), {}) self.assertEqual(file_obj.file_type, self.file_type_fastq)