Пример #1
0
 def test_compile_pairs0(self):
     """
     Test the results of pairing with no tumor or normal samples; should give empty output
     """
     samples = []
     pairs = compile_pairs(samples)
     expected_pairs = {'tumor': [], 'normal': []}
     self.assertTrue(pairs == expected_pairs)
Пример #2
0
 def test_compile_pairs3(self):
     """
     Test pairing with only a single Normal sample
     """
     samples = [{
         "bait_set": "IMPACT468_BAITS",
         "patient_id": "C-DRKHP7",
         "run_id": ["JAX_0397"],
         "preservation_type": ["Frozen"],
         "tumor_type": "Normal"
     }]
     pairs = compile_pairs(samples)
     expected_pairs = {'tumor': [], 'normal': []}
     self.assertTrue(pairs == expected_pairs)
Пример #3
0
 def test_compile_pairs1(self):
     """
     Test pairing with a single pair of samples
     """
     samples = [{
         "patient_id": "C-W86LMR",
         "bait_set": "IMPACT468_BAITS",
         "run_id": ["JAX_0397"],
         "preservation_type": ["Frozen"],
         "tumor_type": "Normal",
         "sample_id": "my_sample_id2",
         "SM": "my_sample_id2"
     }, {
         "patient_id": "C-W86LMR",
         "bait_set": "IMPACT468_BAITS",
         "run_id": ["JAX_0397"],
         "preservation_type": ["Frozen"],
         "tumor_type": "Tumor",
         "SM": "my_sample_id1",
         "sample_id": "my_sample_id1"
     }]
     pairs = compile_pairs(samples)
     expected_pairs = {
         'tumor': [{
             'patient_id': 'C-W86LMR',
             'bait_set': 'IMPACT468_BAITS',
             'tumor_type': 'Tumor',
             "run_id": ["JAX_0397"],
             "preservation_type": ["Frozen"],
             "sample_id": "my_sample_id1",
             "SM": "my_sample_id1"
         }],
         'normal': [{
             'patient_id': 'C-W86LMR',
             'bait_set': 'IMPACT468_BAITS',
             'tumor_type': 'Normal',
             "run_id": ["JAX_0397"],
             "preservation_type": ["Frozen"],
             "sample_id": "my_sample_id2",
             "SM": "my_sample_id2"
         }]
     }
     self.assertTrue(pairs == expected_pairs)
Пример #4
0
    def test_compile_pairs_pooled_normal_and_dmp_bam(self):
        """
        Test that a DMP bam can be found for a tumor sample without any normal in the current request or other requests

        Start with no Normals and add them in reverse order of the Pairing priority (as per docstring header in this document)
        and make sure each added Normal results in the correct pairing
        """
        lims_filegroup_instance = FileGroup.objects.get(name="LIMS")
        poolednormal_filegroup_instance = FileGroup.objects.get(
            name="Pooled Normal")
        fastq_filetype_instance = FileType.objects.get(name="fastq")
        dmp_bam_filegroup_instance = FileGroup.objects.get(name="DMP BAMs")
        bam_filetype_instance = FileType.objects.get(name="bam")

        # generate tumor samples
        # Sample 1 C-8VK0V7
        tumor1_R1_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-8VK0V7.R1.fastq",
            path="/C-8VK0V7.R1.fastq")
        tumor1_R1_filemetadata_instance = FileMetadata.objects.create(
            file=tumor1_R1_file_instance,
            metadata={
                "R": "R1",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "10075_D_3_5",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-8VK0V7",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-8VK0V7-R001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Tumor",
                "cmoSampleClass": "Local Recurrence",
                "externalSampleId": "SK_MEL_1091A_T",
                "investigatorSampleId": "SK_MEL_1091A_T",
                "labHeadEmail": "",
                "labHeadName": ""
            })
        tumor1_R2_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-8VK0V7.R2.fastq",
            path="/C-8VK0V7.R2.fastq")
        tumor1_R2_filemetadata_instance = FileMetadata.objects.create(
            file=tumor1_R2_file_instance,
            metadata={
                "R": "R2",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "10075_D_3_5",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-8VK0V7",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-8VK0V7-R001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Tumor",
                "cmoSampleClass": "Local Recurrence",
                "externalSampleId": "SK_MEL_1091A_T",
                "investigatorSampleId": "SK_MEL_1091A_T",
                "labHeadEmail": "",
                "labHeadName": ""
            })

        # Sample 2 C-ABCDEF ; dummy sample
        tumor2_R1_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-ABCDEF.R1.fastq",
            path="/C-ABCDEF.R1.fastq")
        tumor2_R1_filemetadata_instance = FileMetadata.objects.create(
            file=tumor2_R1_file_instance,
            metadata={
                "R": "R1",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "Sample12345",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-ABCDEF",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-ABCDEF-R001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Tumor",
                "cmoSampleClass": "Local Recurrence",
                "externalSampleId": "SK_MEL_1234_T",
                "investigatorSampleId": "SK_MEL_1234_T",
                "labHeadEmail": "",
                "labHeadName": ""
            })
        tumor2_R2_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-ABCDEF.R2.fastq",
            path="/C-ABCDEF.R2.fastq")
        tumor2_R2_filemetadata_instance = FileMetadata.objects.create(
            file=tumor2_R2_file_instance,
            metadata={
                "R": "R2",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "Sample12345",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-ABCDEF",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-ABCDEF-R001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Tumor",
                "cmoSampleClass": "Local Recurrence",
                "externalSampleId": "SK_MEL_1234_T",
                "investigatorSampleId": "SK_MEL_1234_T",
                "labHeadEmail": "",
                "labHeadName": ""
            })

        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-8VK0V7",
            "tumor_type": "Tumor",
            "sample_id": "10075_D_3_5",
            "SM": "10075_D_3_5",
            "request_id": "10075_D_3",
            'run_id': ['PITT_0439'],
            "preservation_type": ["Frozen"]
        }]

        # test that no pairs are found since there are no Normals loaded yet
        pairs = compile_pairs(samples)
        self.assertDictEqual(pairs, {'tumor': [], 'normal': []})

        # add Pooled Normal from another run
        poolednormal_R1_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=poolednormal_filegroup_instance,
            file_name="FROZENPOOLEDNORMAL.R1.fastq",
            path="/FROZENPOOLEDNORMAL.R1.fastq")
        poolednormal_R1_filemetadata_instance = FileMetadata.objects.create(
            file=poolednormal_R1_file_instance,
            metadata={
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                'bait_set': 'IMPACT468_BAITS',
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "preservation": "Frozen"
            })
        poolednormal_R2_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=poolednormal_filegroup_instance,
            file_name="FROZENPOOLEDNORMAL.R2.fastq",
            path="/FROZENPOOLEDNORMAL.R2.fastq")
        poolednormal_R2_filemetadata_instance = FileMetadata.objects.create(
            file=poolednormal_R2_file_instance,
            metadata={
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                'bait_set': 'IMPACT468_BAITS',
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "preservation": "Frozen"
            })

        pairs = compile_pairs(samples)
        # remove the R1_bid and R2_bid for testing because they are non-deterministic
        # TODO: mock this ^^
        pairs['normal'][0]['R1_bid'].pop()
        pairs['normal'][0]['R2_bid'].pop()

        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Tumor',
                'sample_id': '10075_D_3_5',
                'SM': '10075_D_3_5',
                'request_id': '10075_D_3',
                'run_id': ['PITT_0439'],
                'preservation_type': ['Frozen']
            }],
            'normal': [{
                'CN':
                'MSKCC',
                'PL':
                'Illumina',
                'PU': ['PN_FCID_FROZENPOOLEDNORMAL'],
                'LB':
                'FROZENPOOLEDNORMAL_PITT_0439_1',
                'tumor_type':
                'Normal',
                'ID':
                ['FROZENPOOLEDNORMAL_PITT_0439_PN_FCID_FROZENPOOLEDNORMAL'],
                'SM':
                'FROZENPOOLEDNORMAL_PITT_0439',
                'species':
                '',
                'patient_id':
                'PN_PATIENT_ID',
                'bait_set':
                'IMPACT468',
                'sample_id':
                'FROZENPOOLEDNORMAL_PITT_0439',
                'run_date': [''],
                'specimen_type':
                '',
                'R1': ['/FROZENPOOLEDNORMAL.R1.fastq'],
                'R2': ['/FROZENPOOLEDNORMAL.R2.fastq'],
                'R1_bid': [],  # UUID('cf065d86-3096-47b3-9b6f-cf711a1d6e0f')
                'R2_bid': [],  # UUID('51232bdd-6b31-4a4d-80c4-3aef13965fcd')
                'bam': [],
                'bam_bid': [],
                'specimen_type':
                'Pooled Normal',
                'request_id':
                'FROZENPOOLEDNORMAL_PITT_0439',
                'pi':
                '',
                'pi_email':
                '',
                'run_id': [''],
                'preservation_type': [['Frozen']]
            }]
        }
        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (pn part): pairs ---\n",
            json.dumps(pairs, cls=UUIDEncoder))
        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (pn part): expected ---\n",
            json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertDictEqual(pairs, expected_pairs)

        # Add a DMP Bam for the tumor sample to the database
        dmp_bam_file_instance = File.objects.create(
            file_type=bam_filetype_instance,
            file_group=dmp_bam_filegroup_instance,
            file_name="C-8VK0V7.bam",
            path="/C-8VK0V7.bam")
        dmp_bam_filemetadata_instance = FileMetadata.objects.create(
            file=dmp_bam_file_instance,
            version=1,
            metadata={
                "bai": "/C-8VK0V7.bai",
                "bam": "/C-8VK0V7.bam",
                "type": "N",
                "assay": "IM6",
                "sample": "P-1234567-N01-IM6",
                "anon_id": "ABCDEF-N",
                "patient": {
                    "cmo": "8VK0V7",
                    "dmp": "P-1234567",
                    "updated": "2020-03-19T23:57:51.941963Z",
                    "imported": "2020-03-19T23:57:51.941945Z"
                },
                "updated": "2020-03-25T19:45:16.421154Z",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "version": 1,
                "coverage": 648,
                "imported": "2020-03-25T19:45:16.421137Z",
                "cmo_assay": "IMPACT468",
                "tumor_type": "MBC",
                "external_id": "C-8VK0V7-N901-dZ-IM6",
                "sample_type": "0",
                "tissue_type": "Breast",
                "primary_site": "Breast",
                "project_name": "UK12344567890VB",
                "patient_group": "Group_12344567890",
                "part_c_consent": 1,
                "metastasis_site": "Not Applicable",
                "somatic_calling_status": "Matched",
                "major_allele_contamination": 0.452,
                "minor_allele_contamination": 0.00069
            })

        # test that the DMP bam gets chosen as the sample's matched normal now instead of the pooled normal
        pairs = compile_pairs(samples)

        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Tumor',
                'sample_id': '10075_D_3_5',
                'SM': '10075_D_3_5',
                'request_id': '10075_D_3',
                'run_id': ['PITT_0439'],
                'preservation_type': ['Frozen']
            }],
            'normal': [{
                'CN': 'MSKCC',
                'PL': 'Illumina',
                'PU': ['DMP_FCID_DMP_BARCODEIDX'],
                'LB': 'C-8VK0V7-N901-dZ-IM6_1',
                'tumor_type': 'Normal',
                'ID': ['s_C_8VK0V7_N901_dZ_IM6_DMP_FCID_DMP_BARCODEIDX'],
                'SM': 's_C_8VK0V7_N901_dZ_IM6',
                'species': '',
                'patient_id': 'C-8VK0V7',
                'bait_set': 'IMPACT468_BAITS',
                'sample_id': 'C-8VK0V7-N901-dZ-IM6',
                'run_date': [''],
                'specimen_type': 'DMP Normal',
                'R1': [],
                'R2': [],
                'R1_bid': [],
                'R2_bid': [],
                'bam': ['/C-8VK0V7.bam'],
                'bam_bid': [],  # UUID('77b9a78f-1bed-475c-9799-c18a6f57b347')
                'request_id': 'C-8VK0V7-N901-dZ-IM6',
                'pi': '',
                'pi_email': '',
                'run_id': [''],
                'preservation_type': ['']
            }]
        }

        # remove the bam_bid for testing because it is non-deterministic
        # TODO: mock this ^^
        pairs['normal'][0]['bam_bid'].pop()

        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (dmp part): pairs ---\n",
            json.dumps(pairs, cls=UUIDEncoder))
        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (dmp part): expected ---\n",
            json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertDictEqual(pairs, expected_pairs)

        # Now add a matched normal to the original request for the sample
        # Sample 1 C-8VK0V7
        normal1_R1_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-8VK0V7-N.R1.fastq",
            path="/C-8VK0V7-N.R1.fastq")
        normal1_R1_filemetadata_instance = FileMetadata.objects.create(
            file=normal1_R1_file_instance,
            metadata={
                "R": "R1",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "10075_D_2_3",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-8VK0V7",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-8VK0V7-N001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Normal",
                "cmoSampleClass": "Normal",
                "externalSampleId": "SK_MEL_1091A_N",
                "investigatorSampleId": "SK_MEL_1091A_N",
                "labHeadEmail": "",
                "labHeadName": ""
            })
        normal1_R2_file_instance = File.objects.create(
            file_type=fastq_filetype_instance,
            file_group=lims_filegroup_instance,
            file_name="C-8VK0V7-N.R2.fastq",
            path="/C-8VK0V7-N.R2.fastq")
        normal1_R2_filemetadata_instance = FileMetadata.objects.create(
            file=normal1_R2_file_instance,
            metadata={
                "R": "R2",
                "sex": "F",
                "runId": "PITT_0439",
                "recipe": "IMPACT468",
                "baitSet": "IMPACT468_BAITS",
                "species": "Human",
                "sampleId": "10075_D_2_3",
                "libraryId": None,
                "flowCellId": "HFTCNBBXY",
                "barcodeId": "DUAL_IDT_LIB_267",
                "barcodeIndex": "GTATTGGC-TTGTCGGT",
                "runDate": "2019-12-17",
                "patientId": "C-8VK0V7",
                "requestId": "10075_D_3",
                "sequencingCenter": "MSKCC",
                "platform": "Illumina",
                "sampleName": "C-8VK0V7-N001-d",
                "igocomplete": True,
                "oncoTreeCode": "MEL",
                "preservation": "Frozen",
                "sampleOrigin": "Tissue",
                "specimenType": "Resection",
                "tumorOrNormal": "Normal",
                "cmoSampleClass": "Normal",
                "externalSampleId": "SK_MEL_1091A_N",
                "investigatorSampleId": "SK_MEL_1091A_N",
                "labHeadEmail": "",
                "labHeadName": ""
            })

        # test that the paired normal from the same request gets chosen instead for the pairing
        pairs = compile_pairs(samples)
        # remove the R1_bid and R2_bid for testing because they are non-deterministic
        # TODO: mock this ^^
        pairs['normal'][0]['R1_bid'].pop()
        pairs['normal'][0]['R2_bid'].pop()

        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Tumor',
                'sample_id': '10075_D_3_5',
                'SM': '10075_D_3_5',
                'request_id': '10075_D_3',
                'run_id': ['PITT_0439'],
                'preservation_type': ['Frozen']
            }],
            'normal': [{
                'CN': 'MSKCC',
                'PL': 'Illumina',
                'PU': ['HFTCNBBXY_GTATTGGC-TTGTCGGT'],
                'LB': 's_C_8VK0V7_N001_d_1',
                'tumor_type': 'Normal',
                'ID': ['s_C_8VK0V7_N001_d_HFTCNBBXY_GTATTGGC-TTGTCGGT'],
                'SM': 's_C_8VK0V7_N001_d',
                'species': 'Human',
                'patient_id': 'C-8VK0V7',
                'bait_set': 'IMPACT468_BAITS',
                'sample_id': '10075_D_2_3',
                'run_date': ['2019-12-17'],
                'specimen_type': 'Resection',
                'R1': ['/C-8VK0V7-N.R1.fastq'],
                'R2': ['/C-8VK0V7-N.R2.fastq'],
                'R1_bid': [],  # UUID('bfb94fe6-bf46-431a-8f72-6ee47ee72fc9')
                'R2_bid': [],  # UUID('1e8b68f8-c333-499c-b22d-50fbef2d2ea7')
                'bam': [],
                'bam_bid': [],
                'request_id': '10075_D_3',
                'pi': '',
                'pi_email': '',
                'run_id': ['PITT_0439'],
                'preservation_type': ['Frozen']
            }]
        }

        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (third variation): pairs ---\n",
            json.dumps(pairs, cls=UUIDEncoder))
        print(
            "Running test_compile_pairs_pooled_normal_and_dmp_bam (third variation): expected ---\n",
            json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertDictEqual(pairs, expected_pairs)
Пример #5
0
    def test_get_most_recent_normal1(self):
        """
        Test that when retreiving a normal from other requests, the most recent Normal is returned
        in the event that a patient has several normals
        Return the Normal with the most recent run_date
        """
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_2.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_2.filemetadata.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_4.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_4.filemetadata.json"),
                     verbosity=0)

        # check the total number of db entries now
        self.assertTrue(len(File.objects.all()) == 4)
        self.assertTrue(len(FileMetadata.objects.all()) == 4)

        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-8VK0V7",
            "tumor_type": "Tumor",
            "sample_id": "10075_D_3_5",
            "SM": "10075_D_3_5",
            "request_id": "10075_D_3",
            'run_id': ['JAX_0397'],
            "preservation_type": ["EDTA-Streck"]
        }]

        pairs = compile_pairs(samples)
        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Tumor',
                'sample_id': '10075_D_3_5',
                "SM": "10075_D_3_5",
                'request_id': '10075_D_3',
                'run_id': ['JAX_0397'],
                "preservation_type": ["EDTA-Streck"]
            }],
            'normal': [{
                'CN':
                'MSKCC',
                'PL':
                'Illumina',
                'PU': ['HCYYWBBXY'],
                'LB':
                '10075_D_4_3',
                'tumor_type':
                'Normal',
                'ID': ['s_C_8VK0V7_N001_d_HCYYWBBXY'],
                'SM':
                's_C_8VK0V7_N001_d',
                'species':
                'Human',
                'patient_id':
                'C-8VK0V7',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                '10075_D_4_3',
                'run_date': ['2019-12-13'],
                'specimen_type':
                'Blood',
                'R1': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D_4/Sample_JW_MEL_007_NORM_IGO_10075_D_4_3/JW_MEL_007_NORM_IGO_10075_D_4_3_S15_R1_001.fastq.gz'
                ],
                'R2': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D_4/Sample_JW_MEL_007_NORM_IGO_10075_D_4_3/JW_MEL_007_NORM_IGO_10075_D_4_3_S15_R2_001.fastq.gz'
                ],
                'R1_bid': [UUID('08072445-84ff-4b43-855d-d8d2dc87e2d5')],
                'R2_bid': [UUID('f0d9a1e1-9414-42df-a749-08776732ee04')],
                'bam': [],
                'bam_bid': [],
                'request_id':
                '10075_D_4',
                'run_id': ['JAX_0397'],
                "preservation_type": ["EDTA-Streck"],
                'pi':
                'John Smith',
                'pi_email':
                '*****@*****.**'
            }]
        }

        print("Running get_most_recent_normal1 ---")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertTrue(pairs == expected_pairs)
Пример #6
0
    def test_get_pair_from_other_request(self):
        """
        Test that you can get the correct Normal sample for a patient when the
        Normal sample is part of another request
        """
        # Load fixtures
        # only normals
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_2.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_2.filemetadata.json"),
                     verbosity=0)
        # only tumors
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_3.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D_3.filemetadata.json"),
                     verbosity=0)

        # check the total number of db entries now
        self.assertTrue(len(File.objects.all()) == 4)
        self.assertTrue(len(FileMetadata.objects.all()) == 4)

        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-8VK0V7",
            "tumor_type": "Tumor",
            'run_id': ['JAX_0397'],
            "preservation_type": ["EDTA-Streck"],
            "sample_id": "10075_D_3_5",
            "SM": "10075_D_3_5",
            "request_id": "10075_D_3"
        }]

        pairs = compile_pairs(samples)
        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'run_id': ['JAX_0397'],
                "preservation_type": ["EDTA-Streck"],
                'tumor_type': 'Tumor',
                'sample_id': '10075_D_3_5',
                "SM": "10075_D_3_5",
                'request_id': '10075_D_3'
            }],
            'normal': [{
                'CN':
                'MSKCC',
                'PL':
                'Illumina',
                'PU': ['HCYYWBBXY'],
                'LB':
                '10075_D_2_3',
                'tumor_type':
                'Normal',
                'ID': ['s_C_8VK0V7_N001_d_HCYYWBBXY'],
                'SM':
                's_C_8VK0V7_N001_d',
                'species':
                'Human',
                'patient_id':
                'C-8VK0V7',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                '10075_D_2_3',
                'run_date': ['2019-12-12'],
                'specimen_type':
                'Blood',
                'R1': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D_2/Sample_JW_MEL_007_NORM_IGO_10075_D_2_3/JW_MEL_007_NORM_IGO_10075_D_2_3_S15_R1_001.fastq.gz'
                ],
                'R2': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D_2/Sample_JW_MEL_007_NORM_IGO_10075_D_2_3/JW_MEL_007_NORM_IGO_10075_D_2_3_S15_R2_001.fastq.gz'
                ],
                'R1_bid': [UUID('a46c5e6b-0793-4cd2-b5dd-92b3d71cf1ac')],
                'R2_bid': [UUID('c71c259a-ebc0-4490-9af1-bc99387a70d7')],
                'bam': [],
                'bam_bid': [],
                'request_id':
                '10075_D_2',
                'run_id': ['JAX_0397'],
                "preservation_type": ["EDTA-Streck"],
                'pi':
                'John Smith',
                'pi_email':
                '*****@*****.**'
            }]
        }

        print("Running get_pair_from_other_request ---")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertTrue(pairs == expected_pairs)
Пример #7
0
    def test_compile_pairs5(self):
        """
        Test pairing with a single unpaired tumor sample
        Test that the correct Normal sample is found from within the same request
        This time also load File entries from another request to make sure they do not confound the pairing
        """
        # Load fixtures
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "10075_D.filemetadata.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "05257_CB.file.json"),
                     verbosity=0)
        call_command('loaddata',
                     os.path.join(settings.TEST_FIXTURE_DIR,
                                  "05257_CB.filemetadata.json"),
                     verbosity=0)

        # check the total number of db entries now
        self.assertTrue(len(File.objects.all()) == 14)
        self.assertTrue(len(FileMetadata.objects.all()) == 18)

        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-DRKHP7",
            "tumor_type": "Tumor",
            'run_id': ['JAX_0397'],
            "preservation_type": ["Frozen"],
            "SM": "10075_D_1",
            "sample_id": "10075_D_1"
        }]
        pairs = compile_pairs(samples)
        expected_pairs = {
            'normal': [{
                'CN':
                'MSKCC',
                'ID': ['s_C_DRKHP7_N001_d_HCYYWBBXY'],
                'LB':
                '10075_D_2',
                'PL':
                'Illumina',
                'PU': ['HCYYWBBXY'],
                'R1': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D/Sample_31-N_IGO_10075_D_2/31-N_IGO_10075_D_2_S14_R1_001.fastq.gz'
                ],
                'R1_bid': [UUID('40a07e9a-2198-40b7-9f7f-7696c9d6429e')],
                'R2': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D/Sample_31-N_IGO_10075_D_2/31-N_IGO_10075_D_2_S14_R2_001.fastq.gz'
                ],
                'R2_bid': [UUID('bb7ff922-b741-4df7-ba2a-4f3b8549e8b5')],
                'bam': [],
                'bam_bid': [],
                'SM':
                's_C_DRKHP7_N001_d',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                '10075_D_2',
                'patient_id':
                'C-DRKHP7',
                'request_id':
                '10075_D',
                'run_id': ['JAX_0397'],
                "preservation_type": ["Frozen"],
                'run_date': ['2019-12-12'],
                'species':
                'Human',
                'specimen_type':
                'Blood',
                'tumor_type':
                'Normal',
                'pi':
                'John Smith',
                'pi_email':
                '*****@*****.**'
            }],
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'sample_id': '10075_D_1',
                'SM': '10075_D_1',
                'patient_id': 'C-DRKHP7',
                'run_id': ['JAX_0397'],
                "preservation_type": ["Frozen"],
                'tumor_type': 'Tumor'
            }]
        }

        print("Running test_compile_pairs5 ----")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertTrue(pairs == expected_pairs)
Пример #8
0
    def test_compile_pairs4(self):
        """
        Test pairing with only a single unpaired Tumor sample
        Test that the appropriate Normal sample is found from the other samples in the same request
        missing normal for sample 10075_D_1; querying patient C-DRKHP7
        """
        # Load fixtures:
        test_files_fixture = os.path.join(settings.TEST_FIXTURE_DIR,
                                          "10075_D.file.json")
        call_command('loaddata', test_files_fixture, verbosity=0)
        test_files_fixture = os.path.join(settings.TEST_FIXTURE_DIR,
                                          "10075_D.filemetadata.json")
        call_command('loaddata', test_files_fixture, verbosity=0)

        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-DRKHP7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Tumor",
            "SM": "10075_D_1",
            "sample_id": "10075_D_1"
        }]
        pairs = compile_pairs(samples)
        expected_pairs = {
            'normal': [{
                'CN':
                'MSKCC',
                'ID': ['s_C_DRKHP7_N001_d_HCYYWBBXY'],
                'LB':
                '10075_D_2',
                'PL':
                'Illumina',
                'PU': ['HCYYWBBXY'],
                'R1': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D/Sample_31-N_IGO_10075_D_2/31-N_IGO_10075_D_2_S14_R1_001.fastq.gz'
                ],
                'R1_bid': [UUID('40a07e9a-2198-40b7-9f7f-7696c9d6429e')],
                'R2': [
                    '/ifs/archive/GCL/hiseq/FASTQ/JAX_0397_BHCYYWBBXY/Project_10075_D/Sample_31-N_IGO_10075_D_2/31-N_IGO_10075_D_2_S14_R2_001.fastq.gz'
                ],
                'R2_bid': [UUID('bb7ff922-b741-4df7-ba2a-4f3b8549e8b5')],
                'bam': [],
                'bam_bid': [],
                'SM':
                's_C_DRKHP7_N001_d',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                '10075_D_2',
                'patient_id':
                'C-DRKHP7',
                'request_id':
                '10075_D',
                'run_id': ['JAX_0397'],
                "preservation_type": ["Frozen"],
                'run_date': ['2019-12-12'],
                'species':
                'Human',
                'specimen_type':
                'Blood',
                'tumor_type':
                'Normal',
                'pi':
                'John Smith',
                'pi_email':
                '*****@*****.**'
            }],
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'sample_id': '10075_D_1',
                'SM': '10075_D_1',
                'patient_id': 'C-DRKHP7',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                'tumor_type': 'Tumor'
            }]
        }

        print("Running test_compile_pairs4 ----")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))
        self.assertTrue(pairs == expected_pairs)
Пример #9
0
    def test_compile_pairs2(self):
        """
        Test pairing with multiple samples in a request
        """
        samples = [{
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-DRKHP7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Normal",
            "SM": "my_sample_id1",
            "sample_id": "my_sample_id1"
        }, {
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-8VK0V7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Normal",
            "SM": "my_sample_id2",
            "sample_id": "my_sample_id2"
        }, {
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-DRKHP7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Tumor",
            "SM": "my_sample_id3",
            "sample_id": "my_sample_id3"
        }, {
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-8VK0V7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Tumor",
            "SM": "my_sample_id4",
            "sample_id": "my_sample_id4"
        }, {
            "bait_set": "IMPACT468_BAITS",
            "patient_id": "C-DRKHP7",
            "run_id": ["JAX_0397"],
            "preservation_type": ["Frozen"],
            "tumor_type": "Tumor",
            "SM": "my_sample_id5",
            "sample_id": "my_sample_id5"
        }]
        pairs = compile_pairs(samples)
        expected_pairs = {
            'tumor': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-DRKHP7',
                'tumor_type': 'Tumor',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id3",
                "SM": "my_sample_id3"
            }, {
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Tumor',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id4",
                "SM": "my_sample_id4"
            }, {
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-DRKHP7',
                'tumor_type': 'Tumor',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id5",
                "SM": "my_sample_id5"
            }],
            'normal': [{
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-DRKHP7',
                'tumor_type': 'Normal',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id1",
                "SM": "my_sample_id1"
            }, {
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-8VK0V7',
                'tumor_type': 'Normal',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id2",
                "SM": "my_sample_id2"
            }, {
                'bait_set': 'IMPACT468_BAITS',
                'patient_id': 'C-DRKHP7',
                'tumor_type': 'Normal',
                "run_id": ["JAX_0397"],
                "preservation_type": ["Frozen"],
                "sample_id": "my_sample_id1",
                "SM": "my_sample_id1"
            }]
        }

        print("Running test_compile_pairs2 ---")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))

        self.assertTrue(pairs == expected_pairs)
Пример #10
0
    def test_compile_pairs_custom1(self):
        """
        Test the ability to compile pairs with a custom sample set

        TODO: what pair attributes is this testing?
        """
        samples = [{
            'CN':
            'MSKCC',
            'ID': ['s_juno_argos_demo1_3_HCYYWBBXY'],
            'LB':
            'juno_argos_demo1_3',
            'PL':
            'Illumina',
            'PU': ['HCYYWBBXY'],
            'R1': [
                '/juno/work/ci/argos-test/data/fastq/DU874145-N/DU874145-N_IGO_00000_TEST_L001_R1_001.fastq.gz'
            ],
            'R1_bid': [UUID('a46c5e6b-0793-4cd2-b5dd-92b3d71cf1ac')],
            'R2': [
                '/juno/work/ci/argos-test/data/fastq/DU874145-N/DU874145-N_IGO_00000_TEST_L001_R2_001.fastq.gz'
            ],
            'R2_bid': [UUID('c71c259a-ebc0-4490-9af1-bc99387a70d7')],
            'bam': [],
            'bam_bid': [],
            'SM':
            's_juno_argos_demo1_3',
            'bait_set':
            'IMPACT468_BAITS',
            'sample_id':
            's_juno_argos_demo1_3',
            'patient_id':
            'DU874145',
            'request_id':
            'juno_argos_demo1',
            'run_id': ['JAX_0397'],
            "preservation_type": ["EDTA-Streck"],
            'run_date': ['2019-12-12'],
            'species':
            'Human',
            'specimen_type':
            'Blood',
            'tumor_type':
            'Normal'
        }, {
            'CN':
            'MSKCC',
            'ID': ['s_juno_argos_demo1_5_HFTCNBBXY_GTATTGGC-TTGTCGGT'],
            'LB':
            'juno_argos_demo1_5_1_1_1',
            'PL':
            'Illumina',
            'PU': ['HFTCNBBXY_GTATTGGC-TTGTCGGT'],
            'R1': [
                '/juno/work/ci/argos-test/data/fastq/DU874145-T/DU874145-T_IGO_00000_TEST_L001_R1_001.fastq.gz'
            ],
            'R1_bid': [UUID('d2d8ed36-d8f4-4e93-b038-d38328fad021')],
            'R2': [
                '/juno/work/ci/argos-test/data/fastq/DU874145-T/DU874145-T_IGO_00000_TEST_L001_R2_001.fastq.gz'
            ],
            'R2_bid': [UUID('2f77f3ac-ab25-4a02-90bd-86542401ac89')],
            'bam': [],
            'bam_bid': [],
            'SM':
            's_juno_argos_demo1_5',
            'bait_set':
            'IMPACT468_BAITS',
            'sample_id':
            's_juno_argos_demo1_5',
            'patient_id':
            'DU874145',
            'request_id':
            'juno_argos_demo1',
            'run_id': ['JAX_0397'],
            "preservation_type": ["EDTA-Streck"],
            'run_date': ['2019-12-17'],
            'species':
            'Human',
            'specimen_type':
            'Resection',
            'tumor_type':
            'Tumor'
        }]
        pairs = compile_pairs(samples)

        expected_pairs = {
            'tumor': [{
                'CN':
                'MSKCC',
                'ID': ['s_juno_argos_demo1_5_HFTCNBBXY_GTATTGGC-TTGTCGGT'],
                'LB':
                'juno_argos_demo1_5_1_1_1',
                'PL':
                'Illumina',
                'PU': ['HFTCNBBXY_GTATTGGC-TTGTCGGT'],
                'R1': [
                    '/juno/work/ci/argos-test/data/fastq/DU874145-T/DU874145-T_IGO_00000_TEST_L001_R1_001.fastq.gz'
                ],
                'R1_bid': [UUID('d2d8ed36-d8f4-4e93-b038-d38328fad021')],
                'R2': [
                    '/juno/work/ci/argos-test/data/fastq/DU874145-T/DU874145-T_IGO_00000_TEST_L001_R2_001.fastq.gz'
                ],
                'R2_bid': [UUID('2f77f3ac-ab25-4a02-90bd-86542401ac89')],
                'SM':
                's_juno_argos_demo1_5',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                's_juno_argos_demo1_5',
                'patient_id':
                'DU874145',
                'request_id':
                'juno_argos_demo1',
                'run_id': ['JAX_0397'],
                'run_date': ['2019-12-17'],
                'species':
                'Human',
                'specimen_type':
                'Resection',
                'tumor_type':
                'Tumor',
                "preservation_type": ["EDTA-Streck"],
                'bam': [],
                'bam_bid': []
            }],
            'normal': [{
                'CN':
                'MSKCC',
                'ID': ['s_juno_argos_demo1_3_HCYYWBBXY'],
                'LB':
                'juno_argos_demo1_3',
                'PL':
                'Illumina',
                'PU': ['HCYYWBBXY'],
                'R1': [
                    '/juno/work/ci/argos-test/data/fastq/DU874145-N/DU874145-N_IGO_00000_TEST_L001_R1_001.fastq.gz'
                ],
                'R1_bid': [UUID('a46c5e6b-0793-4cd2-b5dd-92b3d71cf1ac')],
                'R2': [
                    '/juno/work/ci/argos-test/data/fastq/DU874145-N/DU874145-N_IGO_00000_TEST_L001_R2_001.fastq.gz'
                ],
                'R2_bid': [UUID('c71c259a-ebc0-4490-9af1-bc99387a70d7')],
                'SM':
                's_juno_argos_demo1_3',
                'bait_set':
                'IMPACT468_BAITS',
                'sample_id':
                's_juno_argos_demo1_3',
                'patient_id':
                'DU874145',
                'request_id':
                'juno_argos_demo1',
                'run_id': ['JAX_0397'],
                'run_date': ['2019-12-12'],
                'species':
                'Human',
                'specimen_type':
                'Blood',
                'tumor_type':
                'Normal',
                "preservation_type": ["EDTA-Streck"],
                'bam': [],
                'bam_bid': []
            }]
        }

        print("Running test_compile_pairs_custom1 ----")
        print(json.dumps(pairs, cls=UUIDEncoder))
        print(json.dumps(expected_pairs, cls=UUIDEncoder))

        self.assertTrue(pairs == expected_pairs)