def create_ref_genome_from_browser_upload(request): """Handle request to create ReferenceGenome from local file. """ project = get_object_or_404(Project, owner=request.user.get_profile(), uid=request.POST['projectUid']) uploaded_file = request.FILES['refGenomeFile'] # Save uploaded ReferenceGenome to temp file, passing the original filename # as the suffix for easier debug. if not os.path.exists(settings.TEMP_FILE_ROOT): os.mkdir(settings.TEMP_FILE_ROOT) _, temp_file_location = tempfile.mkstemp( suffix='_' + uploaded_file.name, dir=settings.TEMP_FILE_ROOT) with open(temp_file_location, 'w') as temp_file_fh: temp_file_fh.write(request.FILES['refGenomeFile'].read()) error_string = '' try: import_reference_genome_from_local_file( project, request.POST['refGenomeLabel'], temp_file_location, request.POST['importFileFormat'], move=True) except Exception as e: error_string = str(e) result = { 'error': error_string, } return HttpResponse(json.dumps(result), content_type='application/json')
def test_import_reference_genome_from_local_file(self): """Tests importing reference genome. """ TEST_GENBANK_FILE = os.path.join(settings.PWD, 'test_data', 'import_util_test_data', 'mini_mg1655.genbank') import_reference_genome_from_local_file(self.project, 'a label', TEST_GENBANK_FILE, 'genbank')
def test_import_reference_genome_from_local_file__fail_if_no_seq(self): """Should fail if no sequence in file. """ TEST_GENBANK_FILE__NO_SEQ = os.path.join(settings.PWD, 'test_data', 'import_util_test_data', 'mg1655_no_seq.genbank') with self.assertRaises(DataImportError): import_reference_genome_from_local_file(self.project, 'a label', TEST_GENBANK_FILE__NO_SEQ, 'genbank')
def setUp(self): user = User.objects.create_user('test_username', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='test project', owner=user.get_profile()) # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') self.EXPECTED_NUM_VARIANTS = 30 # We created the test genome with these specs. self.EXPECTED_VARIANT_POSITIONS = [800] # one-indexed. while len( self.EXPECTED_VARIANT_POSITIONS) < self.EXPECTED_NUM_VARIANTS: self.EXPECTED_VARIANT_POSITIONS.append( self.EXPECTED_VARIANT_POSITIONS[-1] + 20) self.KNOWN_SUBSTITUTIONS_ROOT = os.path.join( settings.PWD, 'test_data', 'test_genome_known_substitutions') self.TEST_GENOME_FASTA = os.path.join( self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions.fa') self.FAKE_READS_FASTQ1 = os.path.join( self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions_0.snps.simLibrary.1.fq') self.FAKE_READS_FASTQ2 = os.path.join( self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions_0.snps.simLibrary.2.fq') self.FAKE_READS_SAMPLE_UID = '93b68da4' self.FAKE_READS_BAM = os.path.join( self.KNOWN_SUBSTITUTIONS_ROOT, 'bwa_align.sorted.grouped.realigned.bam') self.FAKE_READS_BAM_INDEX = os.path.join( self.KNOWN_SUBSTITUTIONS_ROOT, 'bwa_align.sorted.grouped.realigned.bam.bai') # Create a ref genome from the above. self.REFERENCE_GENOME = import_reference_genome_from_local_file( self.project, 'test_genome', self.TEST_GENOME_FASTA, 'fasta')
def setUp(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.project = Project.objects.create(owner=user.get_profile(), title='Test Project') # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') # Create a sample. self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2) # Create second sample. self.experiment_sample_2 = ExperimentSample.objects.create( project=self.project, label='sample2') copy_and_add_dataset_source( self.experiment_sample_2, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_SAMPLE_2_FASTQ1) copy_and_add_dataset_source( self.experiment_sample_2, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_SAMPLE_2_FASTQ2) # Create a sample with a single fastq self.experiment_sample_single_fastq = ExperimentSample.objects.create( project=self.project, label='sample_single_fastq') # Add the fastq file to the sample copy_and_add_dataset_source(self.experiment_sample_single_fastq, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1)
def test_get_insert_size__generated_data(self): INVERSION_TEST_DATA_DIR = os.path.join(TEST_DATA_DIR, 'sv_testing', 'inversion_5a996d78') INVERSION_REF = os.path.join(INVERSION_TEST_DATA_DIR, 'small_ref.fa') INVERSION_SAMPLE_UID = 'group' INVERSION_SAMPLE_BAM = os.path.join(INVERSION_TEST_DATA_DIR, 'inversion_5a996d78.bam') reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', INVERSION_REF, 'fasta') alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=reference_genome) r = create_sample_and_alignment(self.project, alignment_group, INVERSION_SAMPLE_UID, INVERSION_SAMPLE_BAM) sample_alignment = r['sample_alignment'] mean, stdev = get_insert_size_mean_and_stdev(sample_alignment) self.assertAlmostEqual(mean, 498, delta=2) self.assertAlmostEqual(stdev, 1, delta=1)
def _import_reference(self, ref_path): # Import reference genome ref_genome = import_reference_genome_from_local_file( self.project, 'test_ref', ref_path, 'fasta', move=False) return ref_genome
def test_compress_dataset(self): """ Make sure that compressing a dataset and putting a new dataset entry into the db works correctly. """ user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.test_project = Project.objects.create(title=TEST_PROJECT_NAME, owner=user.get_profile()) self.test_ref_genome = import_reference_genome_from_local_file( self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH, 'genbank') dataset = get_dataset_with_type( self.test_ref_genome, type=Dataset.TYPE.REFERENCE_GENOME_GENBANK) # All the magic happens here compressed_dataset = dataset.make_compressed('.gz') # Grab the new compressed dataset through the ref genome to # make sure that it got added compressed_dataset_through_ref_genome = get_dataset_with_type( entity=self.test_ref_genome, type=Dataset.TYPE.REFERENCE_GENOME_GENBANK, compressed=True) assert compressed_dataset == compressed_dataset_through_ref_genome
def setUp(self): user = User.objects.create_user('test_username', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='snpeff test project', owner=user.get_profile()) # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'snpeff test ref genome', TEST_GENBANK, 'genbank') # Create a new alignment group. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) # Create a sample. self.sample_1 = ExperimentSample.objects.create(project=self.project, label='test sample 1') # Create relationship between alignment and sample. self.sample_alignment = ExperimentSampleToAlignment.objects.create( alignment_group=self.alignment_group, experiment_sample=self.sample_1) # Add unannotated SNP data. self.vcf_dataset = Dataset.objects.create( type=Dataset.TYPE.VCF_FREEBAYES, label=Dataset.TYPE.VCF_FREEBAYES, filesystem_location=TEST_UNANNOTATED_VCF) self.alignment_group.dataset_set.add(self.vcf_dataset)
def setUp(self): user = User.objects.create_user('test_username', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='snpeff test project', owner=user.get_profile()) # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'snpeff test ref genome', TEST_GENBANK, 'genbank') # Create a new alignment group. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) # Create a sample. self.sample_1 = ExperimentSample.objects.create( project=self.project, label='test sample 1') # Create relationship between alignment and sample. self.sample_alignment = ExperimentSampleToAlignment.objects.create( alignment_group=self.alignment_group, experiment_sample=self.sample_1) # Add unannotated SNP data. self.vcf_dataset = Dataset.objects.create( type=VCF_DATASET_TYPE, label=VCF_DATASET_TYPE, filesystem_location=TEST_UNANNOTATED_VCF) self.alignment_group.dataset_set.add(self.vcf_dataset)
def test_run_pipeline__snps_with_effect__no_svs(self): """Tests pipeline with SNPs with effect, but no SVs called. """ ref_genome = import_reference_genome_from_local_file( self.project, 'mg1655_tolC_through_zupT', FullVCFTestSet.TEST_GENBANK, 'genbank') sample_obj = ExperimentSample.objects.create(project=self.project, label='Sample %d' % 0) # Add raw reads to each sample. copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, FullVCFTestSet.FASTQ1[0]) copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, FullVCFTestSet.FASTQ2[0]) result = run_pipeline('test_align', ref_genome, [sample_obj]) alignment_group = result[0] alignment_async_result = result[1] variant_calling_async_result = result[2] alignment_async_result.get() variant_calling_async_result.get() alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid) self.assertEqual(AlignmentGroup.STATUS.COMPLETED, alignment_group.status) # Check that SnpEff worked. v_205 = Variant.objects.get( reference_genome=alignment_group.reference_genome, position=205) v_205_va = v_205.variantalternate_set.all()[0] self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
def setUp(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.project = Project.objects.create(owner=user.get_profile(), title='Test Project') # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') # Create a sample. self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Create a sample for compressed fastq data. self.compressed_experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Add fastq files to first sample. copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2) # Add compressed fastq files to second sample. copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1_GZ) copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2_GZ)
def setUp(self): user = User.objects.create_user('test_username_sv', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='test project', owner=user.get_profile()) # Use genome with deletion from our sv testing repo: # https://github.com/churchlab/structural-variants-testing DELETION_TEST_DATA_DIR = os.path.join(settings.PWD, 'test_data', 'sv_testing', 'deletion_bd5a1123') REF = os.path.join(DELETION_TEST_DATA_DIR, 'small_ref.fa') FASTQ1 = os.path.join(DELETION_TEST_DATA_DIR, 'deletion_bd5a1123.1.fq') FASTQ2 = os.path.join(DELETION_TEST_DATA_DIR, 'deletion_bd5a1123.2.fq') # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', REF, 'fasta') self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, FASTQ2)
def test_compress_dataset(self): """ Make sure that compressing a dataset and putting a new dataset entry into the db works correctly. """ user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.test_project = Project.objects.create( title=TEST_PROJECT_NAME, owner=user.get_profile()) self.test_ref_genome = import_reference_genome_from_local_file( self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH, 'genbank') dataset = get_dataset_with_type(self.test_ref_genome, type= Dataset.TYPE.REFERENCE_GENOME_GENBANK) # All the magic happens here compressed_dataset = dataset.make_compressed('.gz') # Grab the new compressed dataset through the ref genome to # make sure that it got added compressed_dataset_through_ref_genome = get_dataset_with_type( entity= self.test_ref_genome, type= Dataset.TYPE.REFERENCE_GENOME_GENBANK, compressed= True) assert compressed_dataset == compressed_dataset_through_ref_genome
def test_get_insert_size__generated_data(self): INVERSION_TEST_DATA_DIR = os.path.join( TEST_DATA_DIR, 'sv_testing', 'inversion_5a996d78') INVERSION_REF = os.path.join(INVERSION_TEST_DATA_DIR, 'small_ref.fa') INVERSION_SAMPLE_UID = 'group' INVERSION_SAMPLE_BAM = os.path.join(INVERSION_TEST_DATA_DIR, 'inversion_5a996d78.bam') reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', INVERSION_REF, 'fasta') alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=reference_genome) r = create_sample_and_alignment( self.project, alignment_group, INVERSION_SAMPLE_UID, INVERSION_SAMPLE_BAM) sample_alignment = r['sample_alignment'] mean, stdev = get_insert_size_mean_and_stdev(sample_alignment) self.assertAlmostEqual(mean, 498, delta=2) self.assertAlmostEqual(stdev, 1, delta=1)
def test_run_lumpy__multiple_samples_of_same_exact_deletion(self): """Tests lumpy running on multiple samples. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', DELETION_REF, 'fasta') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r1 = _create_sample_and_alignment(self.project, self.alignment_group, DELETION_SAMPLE_1_UID, DELETION_SAMPLE_1_BWA) sa1 = r1['sample_alignment'] r2 = _create_sample_and_alignment(self.project, self.alignment_group, DELETION_SAMPLE_2_UID, DELETION_SAMPLE_2_BWA) sa2 = r2['sample_alignment'] r3 = _create_sample_and_alignment(self.project, self.alignment_group, DELETION_SAMPLE_3_UID, DELETION_SAMPLE_3_BWA) sa3 = r3['sample_alignment'] r4 = _create_sample_and_alignment(self.project, self.alignment_group, DELETION_SAMPLE_4_UID, DELETION_SAMPLE_4_BWA) sa4 = r4['sample_alignment'] # Common params for each run of lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) def _run_lumpy_for_sample_alignment(sa): """Helper function to run lumpy for sample alignment. """ lumpy_params['tool_kwargs'] = { 'region_num': sa.uid, 'sample_alignments': [sa] } find_variants_with_tool(self.alignment_group, lumpy_params, project=self.project) _run_lumpy_for_sample_alignment(sa1) _run_lumpy_for_sample_alignment(sa2) _run_lumpy_for_sample_alignment(sa3) _run_lumpy_for_sample_alignment(sa4) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) # Should have 2 events. self.assertEqual(2, len(variants))
def setUp(self): # Test models. user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.project = Project.objects.create(owner=user.get_profile(), title='Test Project') self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta')
def test_run_lumpy__multiple_samples_of_same_exact_deletion(self): """Tests lumpy running on multiple samples. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', DELETION_REF, 'fasta') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r1 = _create_sample_and_alignment( self.project, self.alignment_group, DELETION_SAMPLE_1_UID, DELETION_SAMPLE_1_BWA) sa1 = r1['sample_alignment'] r2 = _create_sample_and_alignment( self.project, self.alignment_group, DELETION_SAMPLE_2_UID, DELETION_SAMPLE_2_BWA) sa2 = r2['sample_alignment'] r3 = _create_sample_and_alignment( self.project, self.alignment_group, DELETION_SAMPLE_3_UID, DELETION_SAMPLE_3_BWA) sa3 = r3['sample_alignment'] r4 = _create_sample_and_alignment( self.project, self.alignment_group, DELETION_SAMPLE_4_UID, DELETION_SAMPLE_4_BWA) sa4 = r4['sample_alignment'] # Common params for each run of lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) def _run_lumpy_for_sample_alignment(sa): """Helper function to run lumpy for sample alignment. """ lumpy_params['tool_kwargs'] = { 'region_num': sa.uid, 'sample_alignments': [sa] } find_variants_with_tool( self.alignment_group, lumpy_params, project=self.project) _run_lumpy_for_sample_alignment(sa1) _run_lumpy_for_sample_alignment(sa2) _run_lumpy_for_sample_alignment(sa3) _run_lumpy_for_sample_alignment(sa4) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) # Should have 2 events. self.assertEqual(2, len(variants))
def setUp(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.project = Project.objects.create(owner=user.get_profile(), title='Test Project') # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') # Create a sample. self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Create a sample for compressed fastq data. self.compressed_experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Add fastq files to first sample. copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2) # Add compressed fastq files to second sample. copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1_GZ) copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2_GZ) # Create a new alignment group. alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) # Create the expected models. sample_alignment = ExperimentSampleToAlignment.objects.create( alignment_group=alignment_group, experiment_sample=self.experiment_sample) bwa_dataset = copy_and_add_dataset_source( sample_alignment, dataset_label=Dataset.TYPE.BWA_ALIGN, dataset_type=Dataset.TYPE.BWA_ALIGN, original_source_location=TEST_DISC_SPLIT_BAM) bwa_dataset.status = status = Dataset.STATUS.READY bwa_dataset.save() index_bam_file(bwa_dataset.get_absolute_location()) self.bwa_dataset = bwa_dataset self.sample_alignment = sample_alignment
def setUp(self): user = User.objects.create_user('test_username', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='test project', owner=user.get_profile()) # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') self.EXPECTED_NUM_VARIANTS = 30 # We created the test genome with these specs. self.EXPECTED_VARIANT_POSITIONS = [800] # one-indexed. while len(self.EXPECTED_VARIANT_POSITIONS) < self.EXPECTED_NUM_VARIANTS: self.EXPECTED_VARIANT_POSITIONS.append( self.EXPECTED_VARIANT_POSITIONS[-1] + 20) self.KNOWN_SUBSTITUTIONS_ROOT = os.path.join(settings.PWD, 'test_data', 'test_genome_known_substitutions') self.TEST_GENOME_FASTA = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions.fa') self.FAKE_READS_FASTQ1 = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions_0.snps.simLibrary.1.fq') self.FAKE_READS_FASTQ2 = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT, 'test_genome_known_substitutions_0.snps.simLibrary.2.fq') self.FAKE_READS_SAMPLE_UID = '93b68da4' self.FAKE_READS_BAM = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT, 'bwa_align.sorted.grouped.realigned.bam') self.FAKE_READS_BAM_INDEX = os.path.join(self.KNOWN_SUBSTITUTIONS_ROOT, 'bwa_align.sorted.grouped.realigned.bam.bai') # Create a ref genome from the above. self.REFERENCE_GENOME = import_reference_genome_from_local_file( self.project, 'test_genome', self.TEST_GENOME_FASTA, 'fasta')
def setUp(self): self.common_entities = create_common_entities() self.ref_genome = import_reference_genome_from_local_file( self.common_entities['project'], 'ref_genome', TEST_FASTA, 'fasta') ref_genome_source = self.ref_genome.dataset_set.get( type=Dataset.TYPE.REFERENCE_GENOME_FASTA)\ .get_absolute_location() with open(ref_genome_source) as fh: self.ref_genome_seq_record = SeqIO.read(fh, 'fasta')
def test_basic(self): """Basic test. """ self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_GENBANK, 'genbank') variant_set = VariantSet.objects.create( reference_genome=self.reference_genome, label='vs1') ref_genome_filepath = get_dataset_with_type(self.reference_genome, Dataset.TYPE.REFERENCE_GENOME_GENBANK).get_absolute_location() with open(ref_genome_filepath) as fh: ref_genome_seq_record = SeqIO.read(fh, 'genbank') for position in range(10, 111, 10): ref_value = ref_genome_seq_record[position - 1] var = Variant.objects.create( type=Variant.TYPE.TRANSITION, reference_genome=self.reference_genome, chromosome=Chromosome.objects.get(reference_genome=self.reference_genome), position=position, ref_value=ref_value) VariantAlternate.objects.create( variant=var, alt_value='G') VariantToVariantSet.objects.create( variant=var, variant_set=variant_set) new_ref_genome_params = { 'label': 'new' } new_ref_genome = generate_new_reference_genome( variant_set, new_ref_genome_params) new_ref_genome_filepath = get_dataset_with_type( new_ref_genome, Dataset.TYPE.REFERENCE_GENOME_GENBANK)\ .get_absolute_location() with open(new_ref_genome_filepath) as fh: new_ref_genome_seq_record = SeqIO.read(fh, 'genbank') # Assert size unchangd. self.assertEqual(len(new_ref_genome_seq_record), len(ref_genome_seq_record)) # Assert mutations are there. for position in range(10, 111, 10): self.assertEqual('G', str(new_ref_genome_seq_record[position - 1])) # Assert new genome is annotated. self.assertTrue(new_ref_genome.is_annotated())
def setUp(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.project = Project.objects.create(owner=user.get_profile(), title='Test Project') # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') # Create a sample. self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Create a sample for compressed fastq data. self.compressed_experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') # Add fastq files to first sample. copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2) # Add compressed fastq files to second sample. copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1_GZ) copy_and_add_dataset_source(self.compressed_experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2_GZ) # Create a new alignment group. alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) # Create the expected models. sample_alignment = ExperimentSampleToAlignment.objects.create( alignment_group=alignment_group, experiment_sample=self.experiment_sample) bwa_dataset = copy_and_add_dataset_source( sample_alignment, dataset_label=Dataset.TYPE.BWA_ALIGN, dataset_type=Dataset.TYPE.BWA_ALIGN, original_source_location=TEST_DISC_SPLIT_BAM) bwa_dataset.status = status=Dataset.STATUS.READY bwa_dataset.save() index_bam_file(bwa_dataset.get_absolute_location()) self.bwa_dataset = bwa_dataset self.sample_alignment = sample_alignment
def setUp(self): user = User.objects.create_user('test_username', password='******', email='*****@*****.**') # Grab a project. self.project = Project.objects.create(title='test project', owner=user.get_profile()) # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta')
def create_ref_genome_from_server_location(request): """Handle request to create ReferenceGenome from local file. """ project = get_object_or_404(Project, owner=request.user.get_profile(), uid=request.POST['projectUid']) error_string = '' try: import_reference_genome_from_local_file( project, request.POST['refGenomeLabel'], request.POST['refGenomeFileLocation'], request.POST['importFileFormat']) except Exception as e: error_string = str(e) result = { 'error': error_string, } return HttpResponse(json.dumps(result), content_type='application/json')
def test_run_lumpy__deletion(self): """Tests running Lumpy on data that should have a deletion. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', DELETION_REF_GENBANK, 'genbank') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r = _create_sample_and_alignment( self.project, self.alignment_group, DELETION_SAMPLE_1_UID, DELETION_SAMPLE_1_BWA) sample_alignment = r['sample_alignment'] # Run lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) lumpy_params['tool_kwargs'] = { 'region_num': sample_alignment.uid, 'sample_alignments': [sample_alignment] } find_variants_with_tool( self.alignment_group, lumpy_params, project=self.project) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) # Verify that we have the expected deletion around position 10000 of # size 1000. self.assertEqual(1, len(variants)) v = variants[0] # position/ref self.assertTrue(9950 < v.position < 10050) self.assertEqual(SV_REF_VALUE, v.ref_value) vccd = v.variantcallercommondata_set.all()[0] # size size = vccd.data['INFO_END'] - v.position self.assertTrue(900 < size < 1100) va = v.variantalternate_set.all()[0] # Type self.assertEqual('DEL', va.data['INFO_SVTYPE']) # SnpEff data self.assertEqual('geneX', va.data['INFO_EFF_GENE'])
def test_basic(self): """Basic test. """ self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_GENBANK, 'genbank') variant_set = VariantSet.objects.create( reference_genome=self.reference_genome, label='vs1') ref_genome_filepath = get_dataset_with_type( self.reference_genome, Dataset.TYPE.REFERENCE_GENOME_GENBANK).get_absolute_location() with open(ref_genome_filepath) as fh: ref_genome_seq_record = SeqIO.read(fh, 'genbank') for position in range(10, 111, 10): ref_value = ref_genome_seq_record[position - 1] var = Variant.objects.create( type=Variant.TYPE.TRANSITION, reference_genome=self.reference_genome, chromosome=Chromosome.objects.get( reference_genome=self.reference_genome), position=position, ref_value=ref_value) VariantAlternate.objects.create(variant=var, alt_value='G') VariantToVariantSet.objects.create(variant=var, variant_set=variant_set) new_ref_genome_params = {'label': 'new'} new_ref_genome = generate_new_reference_genome(variant_set, new_ref_genome_params) new_ref_genome_filepath = get_dataset_with_type( new_ref_genome, Dataset.TYPE.REFERENCE_GENOME_GENBANK)\ .get_absolute_location() with open(new_ref_genome_filepath) as fh: new_ref_genome_seq_record = SeqIO.read(fh, 'genbank') # Assert size unchangd. self.assertEqual(len(new_ref_genome_seq_record), len(ref_genome_seq_record)) # Assert mutations are there. for position in range(10, 111, 10): self.assertEqual('G', str(new_ref_genome_seq_record[position - 1])) # Assert new genome is annotated. self.assertTrue(new_ref_genome.is_annotated())
def test_generate_genbank_mobile_element_multifasta(self): """Test generation of the mobile element fasta. """ self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_GENBANK, 'genbank') self.reference_genome.ensure_mobile_element_multifasta() me_fa_dataset = get_dataset_with_type( self.reference_genome, Dataset.TYPE.MOBILE_ELEMENT_FASTA) assert os.path.exists( me_fa_dataset.get_absolute_location())
def test_run_pipeline__bad_alignment(self): """Alignment of bad reads. Might happen if user tries to align wrong reads to wrong reference genome. """ ref_genome = import_reference_genome_from_local_file( self.project, 'concat_mg1655_partials', FullVCFTestSet.TEST_CONCAT_GENBANK, 'genbank') sample_list = [self.experiment_sample] # NOTE: Ideally there would be a better way to test this. # In general, we need to figure out how to better communicate the reason # for a failed alignment to the user. with self.assertRaises(Exception): run_pipeline('name_placeholder', ref_genome, sample_list)
def test_run_lumpy__deletion(self): """Tests running Lumpy on data that should have a deletion. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', DELETION_REF_GENBANK, 'genbank') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r = _create_sample_and_alignment(self.project, self.alignment_group, DELETION_SAMPLE_1_UID, DELETION_SAMPLE_1_BWA) sample_alignment = r['sample_alignment'] # Run lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) lumpy_params['tool_kwargs'] = { 'region_num': sample_alignment.uid, 'sample_alignments': [sample_alignment] } find_variants_with_tool(self.alignment_group, lumpy_params, project=self.project) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) # Verify that we have the expected deletion around position 10000 of # size 1000. self.assertEqual(1, len(variants)) v = variants[0] # position/ref self.assertTrue(9950 < v.position < 10050) self.assertEqual(SV_REF_VALUE, v.ref_value) vccd = v.variantcallercommondata_set.all()[0] # size size = vccd.data['INFO_END'] - v.position self.assertTrue(900 < size < 1100) va = v.variantalternate_set.all()[0] # Type self.assertEqual('DEL', va.data['INFO_SVTYPE'])
def setUp(self): common_entities = create_common_entities() self.project = common_entities['project'] self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') self.experiment_sample = ExperimentSample.objects.create( project=self.project, label='sample1') copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, TEST_FASTQ1) copy_and_add_dataset_source(self.experiment_sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, TEST_FASTQ2)
def _perform_assembly(self, data_dict): ref_fasta = data_dict['ref_fasta'] fq_1 = data_dict['fq_1'] fq_2 = data_dict['fq_2'] # Import reference genome ref_genome = import_reference_genome_from_local_file( self.project, 'test_ref', ref_fasta, 'fasta', move=False) # Create sample model sample = ExperimentSample.objects.create( project=self.project, label='test_sample') # Add fastq datasets to sample add_dataset_to_entity( sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, filesystem_location=fq_1) add_dataset_to_entity( sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, filesystem_location=fq_2) # Run alignment of sample to reference alignment_group_label = 'test_alignment' sample_list = [sample] alignment_group, _, _ = run_pipeline( alignment_group_label, ref_genome, sample_list, perform_variant_calling=False, alignment_options={}) # Get resulting ExperimentSampleToAlignment sample_align = ExperimentSampleToAlignment.objects.get( alignment_group=alignment_group, experiment_sample=sample) # Run pipeline and wait on result async_result = run_de_novo_assembly_pipeline([sample_align]) async_result.get() # Retrieve contigs contigs = Contig.objects.filter( parent_reference_genome=ref_genome, experiment_sample_to_alignment=sample_align) return contigs
def test_run_pipeline__multiple_chromosomes(self): """Makes sure variant calling works when there are multiple chromosomes on a single reference genome. """ ref_genome = import_reference_genome_from_local_file( self.project, 'concat_mg1655_partials', FullVCFTestSet.TEST_CONCAT_GENBANK, 'genbank') sample_obj = ExperimentSample.objects.create( project=self.project, label='Sample 0') # Add raw reads to each sample. copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, FullVCFTestSet.FASTQ1[0]) copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, FullVCFTestSet.FASTQ2[0]) sample_list = [sample_obj] result = run_pipeline( 'name_placeholder', ref_genome, sample_list) alignment_group = result[0] alignment_async_result = result[1] variant_calling_async_result = result[2] alignment_async_result.get() variant_calling_async_result.get() alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid) self.assertEqual(AlignmentGroup.STATUS.COMPLETED, alignment_group.status) # Validate that all variants calld. # TODO: Add Chromosome checking. v_515 = Variant.objects.get( reference_genome=alignment_group.reference_genome, position=515) v_515_va = v_515.variantalternate_set.all()[0] self.assertEqual('ygiB', v_515_va.data['INFO_EFF_GENE']) v_205 = Variant.objects.get( reference_genome=alignment_group.reference_genome, position=205) v_205_va = v_205.variantalternate_set.all()[0] self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
def test_parser__sv_lumpy(self): """Tests parsing lumpy output which contains SV data. """ DELETION_TEST_DATA_DIR = os.path.join(TEST_DATA_DIR, 'sv_testing', 'deletion_bd5a1123') DELETION_REF_FASTA = os.path.join( DELETION_TEST_DATA_DIR, 'small_ref.fa') DELETION_SAMPLE_1_UID = 'ds1' DELETION_SAMPLE_2_UID = 'ds2' DELETION_SAMPLE_3_UID = 'ds3' DELETION_SAMPLE_4_UID = 'f8346a99' reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', DELETION_REF_FASTA, 'fasta') alignment_group = AlignmentGroup.objects.create( label='Alignment 1', reference_genome=reference_genome, aligner=AlignmentGroup.ALIGNER.BWA) # Connect lumpy vcf as Dataset. lumpy_vcf_dataset = copy_and_add_dataset_source( alignment_group, Dataset.TYPE.VCF_LUMPY, Dataset.TYPE.VCF_LUMPY, LUMPY_4_SAMPLES_2_DELETIONS_VCF) # Create samples corresponding to sample ids in vcf. create_sample_and_alignment( self.project, alignment_group, DELETION_SAMPLE_1_UID) create_sample_and_alignment( self.project, alignment_group, DELETION_SAMPLE_2_UID) create_sample_and_alignment( self.project, alignment_group, DELETION_SAMPLE_3_UID) create_sample_and_alignment( self.project, alignment_group, DELETION_SAMPLE_4_UID) # Now we have everything we need to parse the vcf. parse_vcf(lumpy_vcf_dataset, alignment_group) # Check expected variants. v_4998 = Variant.objects.get( reference_genome=reference_genome, position=4998) v_4998_vccd = v_4998.variantcallercommondata_set.all()[0] self.assertTrue(v_4998_vccd.data['IS_SV']) v_9999 = Variant.objects.get( reference_genome=reference_genome, position=9999) v_9999_vccd = v_9999.variantcallercommondata_set.all()[0] self.assertTrue(v_9999_vccd.data['IS_SV'])
def main(): # Create a User and Project. user = get_or_create_user() test_project = Project.objects.create(title=EXAMPLE_PROJECT_NAME, owner=user.get_profile()) ref_genome = import_reference_genome_from_local_file(test_project, 'mg1655', MG1655_REF_GENOME, 'genbank', move=False) # Create alignment group and and relate the vcf Dataset to it. alignment_group = AlignmentGroup.objects.create( label='Fix Recoli Alignment', reference_genome=ref_genome, aligner=AlignmentGroup.ALIGNER.BWA) vcf_output_path = get_snpeff_vcf_output_path(alignment_group, Dataset.TYPE.BWA_ALIGN) shutil.copy(LARGE_VCF, vcf_output_path) dataset = Dataset.objects.create( type=Dataset.TYPE.VCF_FREEBAYES_SNPEFF, label=Dataset.TYPE.VCF_FREEBAYES_SNPEFF, filesystem_location=clean_filesystem_location(vcf_output_path), ) alignment_group.dataset_set.add(dataset) # Import ExperimentSampleo objects, setting specific uid to match # the vcf file. with open(EXPERIMENT_SAMPLE_MODEL_DATA_PICKLE) as sample_data_fh: es_data = pickle.load(sample_data_fh) for es in es_data: es_obj = ExperimentSample.objects.create(uid=es.uid, project=test_project, label=es.label) es_obj.data.update({ 'group': es.group, 'well': es.well, 'num_reads': es.num_reads }) es_obj.save() parse_alignment_group_vcf(alignment_group, Dataset.TYPE.VCF_FREEBAYES_SNPEFF)
def test_run_lumpy__inversion(self): """Tests running Lumpy on data with single inversion. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', INVERSION_REF, 'fasta') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r = _create_sample_and_alignment(self.project, self.alignment_group, INVERSION_SAMPLE_UID, INVERSION_SAMPLE_BWA) sample_alignment = r['sample_alignment'] # Run lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) lumpy_params['tool_kwargs'] = { 'region_num': sample_alignment.uid, 'sample_alignments': [sample_alignment] } find_variants_with_tool(self.alignment_group, lumpy_params, project=self.project) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) self.assertEqual(1, len(variants)) v = variants[0] # position self.assertAlmostEqual(v.position, 30000, delta=2) # size vccd = v.variantcallercommondata_set.all()[0] size = vccd.data['INFO_END'] - v.position self.assertAlmostEqual(size, 1000, delta=10)
def main(): # Create a User and Project. user = get_or_create_user() test_project = Project.objects.create( title=EXAMPLE_PROJECT_NAME, owner=user.get_profile()) ref_genome = import_reference_genome_from_local_file(test_project, 'mg1655', MG1655_REF_GENOME, 'genbank', move=False) # Create alignment group and and relate the vcf Dataset to it. alignment_group = AlignmentGroup.objects.create( label='Fix Recoli Alignment', reference_genome=ref_genome, aligner=AlignmentGroup.ALIGNER.BWA) vcf_output_path = get_snpeff_vcf_output_path(alignment_group, Dataset.TYPE.BWA_ALIGN) shutil.copy(LARGE_VCF, vcf_output_path) dataset = Dataset.objects.create( type=Dataset.TYPE.VCF_FREEBAYES_SNPEFF, label=Dataset.TYPE.VCF_FREEBAYES_SNPEFF, filesystem_location=clean_filesystem_location(vcf_output_path), ) alignment_group.dataset_set.add(dataset) # Import ExperimentSampleo objects, setting specific uid to match # the vcf file. with open(EXPERIMENT_SAMPLE_MODEL_DATA_PICKLE) as sample_data_fh: es_data = pickle.load(sample_data_fh) for es in es_data: es_obj = ExperimentSample.objects.create( uid=es.uid, project=test_project, label=es.label ) es_obj.data.update( {'group':es.group, 'well':es.well, 'num_reads':es.num_reads}) es_obj.save() parse_alignment_group_vcf(alignment_group, Dataset.TYPE.VCF_FREEBAYES_SNPEFF)
def test_run_lumpy__inversion(self): """Tests running Lumpy on data with single inversion. """ # Create Datasets / import data. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', INVERSION_REF, 'fasta') # Create an alignment that's already complete, so we can focus on # testing variant calling only. self.alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) r = _create_sample_and_alignment( self.project, self.alignment_group, INVERSION_SAMPLE_UID, INVERSION_SAMPLE_BWA) sample_alignment = r['sample_alignment'] # Run lumpy. lumpy_params = dict(VARIANT_TOOL_PARAMS_MAP[TOOL_LUMPY]) lumpy_params['tool_kwargs'] = { 'region_num': sample_alignment.uid, 'sample_alignments': [sample_alignment] } find_variants_with_tool( self.alignment_group, lumpy_params, project=self.project) merge_lumpy_vcf(self.alignment_group) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) self.assertEqual(1, len(variants)) v = variants[0] # position self.assertAlmostEqual(v.position, 30000, delta=2) # size vccd = v.variantcallercommondata_set.all()[0] size = vccd.data['INFO_END'] - v.position self.assertAlmostEqual(size, 1000, delta=10)
def sv_testing_bootstrap(project): sv_testing_dir = os.path.join(GD_ROOT, 'test_data', 'sv_testing', 'all_svs') fasta = os.path.join(sv_testing_dir, 'ref.fa') fq1 = os.path.join(sv_testing_dir, 'simLibrary.1.fq') fq2 = os.path.join(sv_testing_dir, 'simLibrary.2.fq') ref_genome = import_reference_genome_from_local_file( project, 'ref', fasta, 'fasta') sample = ExperimentSample.objects.create( project=project, label='simLibrary', ) copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, fq1) copy_and_add_dataset_source(sample, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, fq2) if '--sv' in sys.argv: # using --sv argument runs pipeline for SV project run_pipeline('sample_alignment_ref', ref_genome, [sample])
def test_dataset_strings(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.test_project = Project.objects.create(title=TEST_PROJECT_NAME, owner=user.get_profile()) self.test_ref_genome = import_reference_genome_from_local_file( self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH, 'genbank') dataset = get_dataset_with_type( self.test_ref_genome, type=Dataset.TYPE.REFERENCE_GENOME_GENBANK) self.assertEquals( dataset.internal_string(self.test_ref_genome), (str(self.test_ref_genome.uid) + '_' + uppercase_underscore(Dataset.TYPE.REFERENCE_GENOME_GENBANK)))
def _generate_test_instance(self, rg_files, rg_names=None): if rg_names is None: rg_names = [str(i) for i in range(len(rg_files))] project = self.common_entities['project'] ref_genomes = [] for i, rg_file in enumerate(rg_files): file_type = 'fasta' if rg_file.endswith('.fa') else 'genbank' ref_genomes.append( import_reference_genome_from_local_file(project, rg_names[i], rg_file, file_type, move=False)) test_label = 'concat_test' request_data = { 'newGenomeLabel': test_label, 'refGenomeUidList': [rg.uid for rg in ref_genomes] } request = HttpRequest() request.POST = {'data': json.dumps(request_data)} request.method = 'POST' request.user = self.common_entities['user'] authenticate(username=TEST_USERNAME, password=TEST_PASSWORD) self.assertTrue(request.user.is_authenticated()) ref_genomes_concatenate(request) concat_ref = ReferenceGenome.objects.get(label=test_label) # Assert correct number of chromosomes self.assertEqual(concat_ref.num_chromosomes, sum([rg.num_chromosomes for rg in ref_genomes])) # Assert correct number of bases self.assertEqual(concat_ref.num_bases, sum([rg.num_bases for rg in ref_genomes]))
def _generate_test_instance(rg_files, rg_names=None): if rg_names == None: rg_names = [str(i) for i in range(len(rg_files))] project = self.common_entities['project'] ref_genomes = [] for i,rg_file in enumerate(rg_files): file_type = 'fasta' if rg_file.endswith('.fa') else 'genbank' ref_genomes.append(import_reference_genome_from_local_file( project, rg_names[i], rg_file, file_type, move=False)) test_label = 'concat_test' request_data = { 'newGenomeLabel': test_label, 'refGenomeUidList': [rg.uid for rg in ref_genomes] } request = HttpRequest() request.POST = {'data':json.dumps(request_data)} request.method = 'POST' request.user = self.common_entities['user'] authenticate(username=TEST_USERNAME, password=TEST_PASSWORD) self.assertTrue(request.user.is_authenticated()) response = ref_genomes_concatenate(request) concat_ref=ReferenceGenome.objects.get(label=test_label) # Assert correct number of chromosomes self.assertEqual(concat_ref.num_chromosomes, sum([rg.num_chromosomes for rg in ref_genomes])) # Assert correct number of bases self.assertEqual(concat_ref.num_bases, sum([rg.num_bases for rg in ref_genomes])) # Delete Reference Genome concat_ref.delete()
def test_dataset_strings(self): user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.test_project = Project.objects.create( title=TEST_PROJECT_NAME, owner=user.get_profile()) self.test_ref_genome = import_reference_genome_from_local_file( self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH, 'genbank') dataset = get_dataset_with_type(self.test_ref_genome, type= Dataset.TYPE.REFERENCE_GENOME_GENBANK) self.assertEquals( dataset.internal_string(self.test_ref_genome), (str(self.test_ref_genome.uid) + '_' + uppercase_underscore(Dataset.TYPE.REFERENCE_GENOME_GENBANK)))
def test_run_pipeline__snps_with_effect__no_svs(self): """Tests pipeline with SNPs with effect, but no SVs called. """ ref_genome = import_reference_genome_from_local_file( self.project, 'mg1655_tolC_through_zupT', FullVCFTestSet.TEST_GENBANK, 'genbank') sample_obj = ExperimentSample.objects.create( project=self.project, label='Sample %d' % 0) # Add raw reads to each sample. copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ1, Dataset.TYPE.FASTQ1, FullVCFTestSet.FASTQ1[0]) copy_and_add_dataset_source(sample_obj, Dataset.TYPE.FASTQ2, Dataset.TYPE.FASTQ2, FullVCFTestSet.FASTQ2[0]) result = run_pipeline( 'test_align', ref_genome, [sample_obj]) alignment_group = result[0] alignment_async_result = result[1] variant_calling_async_result = result[2] alignment_async_result.get() variant_calling_async_result.get() alignment_group = AlignmentGroup.objects.get(uid=alignment_group.uid) self.assertEqual(AlignmentGroup.STATUS.COMPLETED, alignment_group.status) # Check that SnpEff worked. v_205 = Variant.objects.get( reference_genome=alignment_group.reference_genome, position=205) v_205_va = v_205.variantalternate_set.all()[0] self.assertEqual('tolC', v_205_va.data['INFO_EFF_GENE'])
def setUp(self): """Override. """ user = User.objects.create_user(TEST_USERNAME, password=TEST_PASSWORD, email=TEST_EMAIL) self.test_project = Project.objects.create( title=TEST_PROJECT_NAME, owner=user.get_profile()) self.test_ref_genome = ReferenceGenome.objects.create( project=self.test_project, label='boom') self.test_chromosome = Chromosome.objects.create( reference_genome=self.test_ref_genome, label='Chromosome', num_bases=9001) self.test_ext_ref_genome = import_reference_genome_from_local_file( self.test_project, TEST_REF_GENOME_NAME, TEST_REF_GENOME_PATH, 'genbank')
def setUp(self): self.common_data = create_common_entities() self.project = self.common_data['project'] self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta')
def test_run_lumpy(self): TEST_SAMPLE_UID = '8c57e7b9' # Create a ref genome. self.reference_genome = import_reference_genome_from_local_file( self.project, 'ref_genome', TEST_FASTA, 'fasta') # Create a sample. self.experiment_sample = ExperimentSample.objects.create( uid=TEST_SAMPLE_UID, project=self.project, label='sample1') # Create a new alignment group. alignment_group = AlignmentGroup.objects.create( label='test alignment', reference_genome=self.reference_genome) self.alignment_group = alignment_group # Create the expected models. sample_alignment = ExperimentSampleToAlignment.objects.create( alignment_group=alignment_group, experiment_sample=self.experiment_sample) bwa_dataset = Dataset.objects.create(label=Dataset.TYPE.BWA_ALIGN, type=Dataset.TYPE.BWA_ALIGN, status=Dataset.STATUS.READY) bwa_dataset.filesystem_location = clean_filesystem_location( TEST_DISC_SPLIT_BAM) bwa_dataset.save() sample_alignment.dataset_set.add(bwa_dataset) sample_alignment.save() self.bwa_dataset = bwa_dataset self.sample_alignment = sample_alignment fasta_ref = get_dataset_with_type( self.reference_genome, Dataset.TYPE.REFERENCE_GENOME_FASTA).get_absolute_location() sample_alignments = [self.sample_alignment] vcf_output_dir = self.alignment_group.get_model_data_dir() vcf_output_filename = os.path.join(vcf_output_dir, 'lumpy.vcf') alignment_type = 'BWA_ALIGN' # NOTE: Running these functions but not checking results. get_discordant_read_pairs(self.sample_alignment) get_split_reads(self.sample_alignment) run_lumpy(fasta_ref, sample_alignments, vcf_output_dir, vcf_output_filename, alignment_type) dataset = Dataset.objects.create( type=Dataset.TYPE.VCF_LUMPY, label=Dataset.TYPE.VCF_LUMPY, filesystem_location=vcf_output_filename, ) self.alignment_group.dataset_set.add(dataset) # Parse the resulting vcf, grab variant objects parse_alignment_group_vcf(self.alignment_group, Dataset.TYPE.VCF_LUMPY) # Grab the resulting variants. variants = Variant.objects.filter( reference_genome=self.reference_genome) # There should be a Variant object for each sv event. self.assertEqual(2, len(variants)) # One event should be located very close to 25k va_positions = [v.position for v in variants] va_offset = [25000 - va_pos for va_pos in va_positions] self.assertTrue(any([v < 50 for v in va_offset]))