示例#1
0
def generate_sample_pairing_and_mapping_files(run_ids):

    sample_pairing = ""
    sample_mapping = ""

    runs = Run.objects.filter(id__in=run_ids)

    request_id_set = set()

    files = list()

    if runs:
        pipeline = runs[0].app

    for r in runs:
        request_id_set.add(r.tags["requestId"])
        inp_port = Port.objects.filter(run_id=r.id, name="pair").first()
        tumor_sample_name = inp_port.db_value[0]["ID"]
        for p in inp_port.db_value[0]["R1"]:
            sample_mapping += "\t".join([tumor_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[0]["R2"]:
            sample_mapping += "\t".join([tumor_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[0]["zR1"]:
            sample_mapping += "\t".join([tumor_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[0]["zR2"]:
            sample_mapping += "\t".join([tumor_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        normal_sample_name = inp_port.db_value[1]["ID"]
        for p in inp_port.db_value[1]["R1"]:
            sample_mapping += "\t".join([normal_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[1]["R2"]:
            sample_mapping += "\t".join([normal_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[1]["zR1"]:
            sample_mapping += "\t".join([normal_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[1]["zR2"]:
            sample_mapping += "\t".join([normal_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))
        for p in inp_port.db_value[1]["bam"]:
            sample_mapping += "\t".join([normal_sample_name, FileProcessor.get_file_path(p["location"])]) + "\n"
            files.append(FileProcessor.get_file_path(p["location"]))

        sample_pairing += "\t".join([normal_sample_name, tumor_sample_name]) + "\n"

    if runs:
        data_clinical = generate_sample_data_content(
            files, pipeline_name=pipeline.name, pipeline_github=pipeline.github, pipeline_version=pipeline.version
        )

    return sample_mapping, sample_pairing, data_clinical
示例#2
0
def create_data_clinical_file(run_id_list):
    files = list()
    pipeline_names = set()
    pipeline_githubs = set()
    pipeline_versions = set()
    for run_id in run_id_list:
        argos_run = Run.objects.get(id=run_id)
        pipeline = argos_run.app
        pipeline_names.add(pipeline.name)
        pipeline_githubs.add(pipeline.github)
        pipeline_versions.add(pipeline.version)
        files = files + get_files_from_run(argos_run)
    data_clinical_content = generate_sample_data_content(
        files,
        pipeline_name=",".join(pipeline_names),
        pipeline_github=",".join(pipeline_githubs),
        pipeline_version=",".join(pipeline_versions),
    )
    data_clinical_content = data_clinical_content.strip()
    return {"class": "File", "basename": "sample_data_clinical.txt", "contents": data_clinical_content}
示例#3
0
    def get_jobs(self):
        files = FileRepository.filter(queryset=self.files,
                                      metadata={
                                          "requestId": self.request_id,
                                          "igocomplete": True
                                      })
        argos_jobs = list()

        cnt_tumors = FileRepository.filter(queryset=self.files,
                                           metadata={
                                               "requestId": self.request_id,
                                               "tumorOrNormal": "Tumor",
                                               "igocomplete": True
                                           }).count()
        if cnt_tumors == 0:
            cant_do = CantDoEvent(self.job_group_notifier_id).to_dict()
            send_notification.delay(cant_do)
            all_normals_event = SetLabelEvent(self.job_group_notifier_id,
                                              "all_normals").to_dict()
            send_notification.delay(all_normals_event)
            return argos_jobs

        data = list()
        for f in files:
            sample = dict()
            sample["id"] = f.file.id
            sample["path"] = f.file.path
            sample["file_name"] = f.file.file_name
            sample["metadata"] = f.metadata
            data.append(sample)

        files = list()
        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample["metadata"]["sampleId"]
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        argos_inputs, error_samples = construct_argos_jobs(samples)
        number_of_inputs = len(argos_inputs)

        sample_pairing = ""
        sample_mapping = ""
        pipeline = self.get_pipeline_id()

        try:
            pipeline_obj = Pipeline.objects.get(id=pipeline)
        except Pipeline.DoesNotExist:
            pass

        for i, job in enumerate(argos_inputs):
            tumor_sample_name = job["pair"][0]["ID"]
            for p in job["pair"][0]["R1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["R2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["zR1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][0]["zR2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([tumor_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            normal_sample_name = job["pair"][1]["ID"]
            for p in job["pair"][1]["R1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["R2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["zR1"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job["pair"][1]["zR2"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            for p in job["pair"][1]["bam"]:
                filepath = FileProcessor.parse_path_from_uri(p["location"])
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)

            name = "ARGOS %s, %i of %i" % (self.request_id, i + 1,
                                           number_of_inputs)
            assay = job["assay"]
            pi = job["pi"]
            pi_email = job["pi_email"]

            sample_pairing += "\t".join(
                [normal_sample_name, tumor_sample_name]) + "\n"

            tags = {
                "requestId": self.request_id,
                "sampleNameTumor": tumor_sample_name,
                "sampleNameNormal": normal_sample_name,
                "labHeadName": pi,
                "labHeadEmail": pi_email,
            }
            argos_jobs.append(
                RunCreator(app=pipeline, inputs=job, name=name, tags=tags))

        operator_run_summary = UploadAttachmentEvent(
            self.job_group_notifier_id, "sample_pairing.txt",
            sample_pairing).to_dict()
        send_notification.delay(operator_run_summary)

        mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id,
                                                   "sample_mapping.txt",
                                                   sample_mapping).to_dict()
        send_notification.delay(mapping_file_event)

        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline_obj.name,
            pipeline_github=pipeline_obj.github,
            pipeline_version=pipeline_obj.version,
        )
        sample_data_clinical_event = UploadAttachmentEvent(
            self.job_group_notifier_id, "sample_data_clinical.txt",
            data_clinical).to_dict()
        send_notification.delay(sample_data_clinical_event)

        self.evaluate_sample_errors(error_samples)
        self.summarize_pairing_info(argos_inputs)

        return argos_jobs
示例#4
0
    def get_jobs(self):

        argos_jobs = list()

        if self.request_id:
            files = FileRepository.filter(queryset=self.files,
                                          metadata={
                                              'requestId': self.request_id,
                                              'igocomplete': True
                                          },
                                          filter_redact=True)

            cnt_tumors = FileRepository.filter(queryset=self.files,
                                               metadata={
                                                   'requestId':
                                                   self.request_id,
                                                   'tumorOrNormal': 'Tumor',
                                                   'igocomplete': True
                                               },
                                               filter_redact=True).count()
        elif self.pairing:
            files, cnt_tumors = self.get_files_for_pairs()

        if cnt_tumors == 0:
            cant_do = CantDoEvent(self.job_group_notifier_id).to_dict()
            send_notification.delay(cant_do)
            all_normals_event = SetLabelEvent(self.job_group_notifier_id,
                                              'all_normals').to_dict()
            send_notification.delay(all_normals_event)
            return argos_jobs

        data = list()
        for f in files:
            sample = dict()
            sample['id'] = f.file.id
            sample['path'] = f.file.path
            sample['file_name'] = f.file.file_name
            sample['metadata'] = f.metadata
            data.append(sample)

        files = list()
        samples = list()
        # group by igoId
        igo_id_group = dict()
        for sample in data:
            igo_id = sample['metadata']['sampleId']
            if igo_id not in igo_id_group:
                igo_id_group[igo_id] = list()
            igo_id_group[igo_id].append(sample)

        for igo_id in igo_id_group:
            samples.append(build_sample(igo_id_group[igo_id]))

        argos_inputs, error_samples = construct_argos_jobs(
            samples, self.pairing)
        number_of_inputs = len(argos_inputs)

        sample_pairing = ""
        sample_mapping = ""
        pipeline = self.get_pipeline_id()

        try:
            pipeline_obj = Pipeline.objects.get(id=pipeline)
        except Pipeline.DoesNotExist:
            pass

        check_for_duplicates = list()
        for i, job in enumerate(argos_inputs):
            tumor_sample_name = job['pair'][0]['ID']
            for p in job['pair'][0]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][0]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([tumor_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            normal_sample_name = job['pair'][1]['ID']
            for p in job['pair'][1]['R1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['R2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    sample_mapping += "\t".join([normal_sample_name, filepath
                                                 ]) + "\n"
                    files.append(filepath)
            for p in job['pair'][1]['zR1']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)
            for p in job['pair'][1]['zR2']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            for p in job['pair'][1]['bam']:
                filepath = FileProcessor.parse_path_from_uri(p['location'])
                file_str = "\t".join([normal_sample_name, filepath]) + "\n"
                if file_str not in check_for_duplicates:
                    check_for_duplicates.append(file_str)
                    sample_mapping += file_str
                if filepath not in files:
                    files.append(filepath)

            name = "ARGOS %s, %i of %i" % (self.request_id, i + 1,
                                           number_of_inputs)
            assay = job['assay']
            pi = job['pi']
            pi_email = job['pi_email']

            sample_pairing += "\t".join(
                [normal_sample_name, tumor_sample_name]) + "\n"

            argos_jobs.append((APIRunCreateSerializer(
                data={
                    'app': pipeline,
                    'inputs': argos_inputs,
                    'name': name,
                    'tags': {
                        'requestId': self.request_id,
                        'sampleNameTumor': tumor_sample_name,
                        'sampleNameNormal': normal_sample_name,
                        'labHeadName': pi,
                        'labHeadEmail': pi_email
                    }
                }), job))

        operator_run_summary = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_pairing.txt',
            sample_pairing).to_dict()
        send_notification.delay(operator_run_summary)

        mapping_file_event = UploadAttachmentEvent(self.job_group_notifier_id,
                                                   'sample_mapping.txt',
                                                   sample_mapping).to_dict()
        send_notification.delay(mapping_file_event)

        data_clinical = generate_sample_data_content(
            files,
            pipeline_name=pipeline_obj.name,
            pipeline_github=pipeline_obj.github,
            pipeline_version=pipeline_obj.version)
        sample_data_clinical_event = UploadAttachmentEvent(
            self.job_group_notifier_id, 'sample_data_clinical.txt',
            data_clinical).to_dict()
        send_notification.delay(sample_data_clinical_event)

        self.evaluate_sample_errors(error_samples)
        self.summarize_pairing_info(argos_inputs)

        return argos_jobs