def _create_all_entities(self): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_ses = ses.setdefault('study{}'.format(j), []) dt = datetime.now() kwargs = { 'experiment_date': str(dt.replace(tzinfo=tz.tzutc())), 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Illumina', 'instrument_model': '454 GS FLX Titanium', 'max_insert_size': 600, 'mean_insert_size': 500, 'mean_depth': 40, 'total_reads': 800, 'mean_read_length': 200 } se0 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se0'.format(j)) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(**kwargs, sequencing_center=sc, external_id='study{}-se1'.format(j)) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, gfs, studies
def _create_all_entities(): """ Create 2 studies with genomic files and read groups """ sc = SequencingCenter(name='sc') studies = [] ses = {} rgs = {} gfs = {} for j in range(2): s = Study(external_id='s{}'.format(j)) p = Participant(external_id='p{}'.format(j)) s.participants.append(p) study_gfs = gfs.setdefault('study{}'.format(j), []) for i in range(3): b = Biospecimen(external_sample_id='b{}'.format(i), analyte_type='DNA', sequencing_center=sc, participant=p) gf = GenomicFile( external_id='study{}-gf{}'.format(j, i), urls=['s3://mybucket/key', 'https://gen3.something.com/did'], hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'}) study_gfs.append(gf) b.genomic_files.append(gf) study_rgs = rgs.setdefault('study{}'.format(j), []) rg0 = ReadGroup(external_id='study{}-rg0'.format(j)) rg0.genomic_files.extend(study_gfs[0:2]) rg1 = ReadGroup(external_id='study{}-rg1'.format(j)) rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_ses = ses.setdefault('study{}'.format(j), []) se0 = SequencingExperiment(external_id='study{}-se0'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se0.genomic_files.extend(study_gfs[0:2]) se1 = SequencingExperiment(external_id='study{}-se1'.format(j), experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) se1.genomic_files.extend([study_gfs[0], study_gfs[-1]]) study_rgs.extend([rg0, rg1]) study_ses.extend([se0, se1]) studies.append(s) db.session.add_all(studies) db.session.commit() return ses, rgs, gfs, studies
def _create_entities(self): # Create study study = Study(external_id='phs001') # Create participant p = Participant(external_id='p1', is_proband=True, study=study) # Create sequencing_center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create sequencing experiments se1 = SequencingExperiment(**self._make_seq_exp('se1'), sequencing_center_id=sc.kf_id) se2 = SequencingExperiment(**self._make_seq_exp('se2'), sequencing_center_id=sc.kf_id) # Create biospecimen bs = Biospecimen(external_sample_id='bio1', analyte_type='dna', participant_id=p.kf_id, sequencing_center_id=sc.kf_id) # Create genomic files gfs = [] for i in range(4): kwargs = { 'file_name': 'file_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': {'md5': str(uuid.uuid4())}, 'controlled_access': True, 'is_harmonized': True, 'reference_genome': 'Test01' } gf = GenomicFile(**kwargs, sequencing_experiment_id=se1.kf_id) if i % 2: se1.genomic_files.append(gf) else: se2.genomic_files.append(gf) gfs.append(gf) bs.genomic_files = gfs p.biospecimens = [bs] db.session.add(p) db.session.commit()
def _create_save_to_db(self): """ Create and save sequencing_experiment """ sc = SequencingCenter(name='sc') kwargs = { 'external_id': 'blah', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Illumina', 'instrument_model': '454 GS FLX Titanium', 'max_insert_size': 600, 'mean_insert_size': 500, 'mean_depth': 40, 'total_reads': 800, 'mean_read_length': 200 } se = SequencingExperiment(**kwargs, sequencing_center=sc) db.session.add(se) db.session.commit() return se
def test_post(self): """ Test create a new sequencing_experiment_genomic_file """ # Create needed entities gf = GenomicFile(external_id='gf0') sc = SequencingCenter(name='sc') se = SequencingExperiment(external_id='se0', experiment_strategy='WGS', is_paired_end=True, platform='platform', sequencing_center=sc) db.session.add_all([gf, se]) db.session.commit() kwargs = {'sequencing_experiment_id': se.kf_id, 'genomic_file_id': gf.kf_id, 'external_id': 'se0-gf0' } # Send get request response = self.client.post(url_for(SE_GF_LIST_URL), data=json.dumps(kwargs), headers=self._api_headers()) # Check response status status_code self.assertEqual(response.status_code, 201) # Check response content response = json.loads(response.data.decode('utf-8')) assert response['results']['kf_id'] self.assertEqual(1, SequencingExperimentGenomicFile.query.count())
def _create_experiments(self, total): """ Creates sequencing experiments """ e_list = [] dt = datetime.now() for i in range(total): e_data = { 'external_id': 'sequencing_experiment_{}'.format(i), 'experiment_date': dt - relativedelta.relativedelta(years=random.randint(1, 3)) + relativedelta.relativedelta(months=random.randint(1, 6)) + relativedelta.relativedelta(days=random.randint(1, 30)), 'experiment_strategy': random.choice(self.experiment_strategy_list), 'library_name': 'Test_library_name_{}'.format(i), 'library_strand': random.choice(self.library_strand_list), 'is_paired_end': random.choice(self.is_paired_end_list), 'platform': random.choice(self.platform_list), 'instrument_model': random.choice(self.instrument_model_list), 'max_insert_size': random.choice([300, 350, 500]), 'mean_insert_size': random.randint(300, 500), 'mean_depth': random.randint(40, 60), 'total_reads': random.randint(400, 1000), 'mean_read_length': random.randint(400, 1000) } genomic_files = self._create_genomic_files( random.randint(self.min_gen_files, self.max_gen_files)) se = SequencingExperiment(**e_data, genomic_files=genomic_files) e_list.append( SequencingExperiment(**e_data, genomic_files=genomic_files)) return e_list
def _create_save_to_db(self): """ Make all entities """ # Create sequencing_center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Data kwargs = { 'experiment_strategy': 'WXS', 'library_name': 'library', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'platform', 'instrument_model': '454 GS FLX Titanium', 'max_insert_size': 600, 'mean_insert_size': 500, 'mean_depth': 40, 'total_reads': 800, 'mean_read_length': 200 } # Create many to many se and gf ses = [] gfs = [] for i in range(2): gfs.append( GenomicFile(external_id='gf{}'.format(i)) ) ses.append( SequencingExperiment(**kwargs, sequencing_center=sc, external_id='se{}'.format(i)) ) db.session.add(SequencingExperimentGenomicFile( genomic_file=gfs[0], sequencing_experiment=ses[0], external_id='se0-gf0')) db.session.add(SequencingExperimentGenomicFile( genomic_file=gfs[0], sequencing_experiment=ses[1], external_id='se1-gf0')) db.session.add(SequencingExperimentGenomicFile( genomic_file=gfs[1], sequencing_experiment=ses[0], external_id='se0-gf1')) db.session.add(SequencingExperimentGenomicFile( genomic_file=gfs[1], sequencing_experiment=ses[1], external_id='se1-gf1')) db.session.commit() return ses, gfs
def _create_entities(self): """ Create participant with required entities """ # Sequencing center sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() # Create study study = Study(external_id='phs001') # Participants p = Participant(external_id='p0', is_proband=True, study=study) # Biospecimen bs = Biospecimen(analyte_type='dna', sequencing_center=sc, participant=p) # SequencingExperiment data = { 'external_id': 'se', 'experiment_strategy': 'wgs', 'is_paired_end': True, 'platform': 'platform', 'sequencing_center': sc } se = SequencingExperiment(**data) # Genomic Files genomic_files = [] for i in range(4): data = { 'file_name': 'gf_{}'.format(i), 'data_type': 'submitted aligned read', 'file_format': '.cram', 'urls': ['s3://file_{}'.format(i)], 'hashes': { 'md5': str(uuid.uuid4()) }, 'is_harmonized': True if i % 2 else False } gf = GenomicFile(**data) bs.genomic_files.append(gf) se.genomic_files.append(gf) genomic_files.append(gf) ct = self._create_cavatica_task('ct1') db.session.add(ct) db.session.add(study) db.session.commit()
def test_foreign_key_constraint(self): """ Test sequencing_experiment cannot be created with out sequencing_center """ # Create sequencialexperiment se_id = 'Test_SequencingExperiment_0' seq_experiment_data = self._make_seq_exp(external_id=se_id) e = SequencingExperiment(**seq_experiment_data) # Check for database self.assertRaises(IntegrityError, db.session.add(e))
def test_not_null_constraint(self): """ Test sequencing_experiment cannot be created with out sequencing_center """ dt = datetime.now() # Create sequencialexperiment without genomic_file kf_id se_id = 'Test_SequencingExperiment_0' seq_experiment_data = self._make_seq_exp(external_id=se_id) e = SequencingExperiment(**seq_experiment_data) # Add sequencing_experiment to db self.assertRaises(IntegrityError, db.session.add(e))
def _create_experiments(self, total=1, sequencing_center_id=None): """ Create sequencing experiments """ data = { 'external_id': 'se1', 'experiment_strategy': 'wgs', 'is_paired_end': True, 'platform': 'platform', 'sequencing_center_id': sequencing_center_id } se = SequencingExperiment(**data) db.session.add(se) db.session.commit() return se
def _create_save_to_db(self): """ Create and save sequencing_experiment """ sc = SequencingCenter(name="Baylor") kwargs = self._make_seq_exp(external_id='se') se = SequencingExperiment(**kwargs, sequencing_center_id=sc.kf_id) sc.sequencing_experiments.extend([se]) db.session.add(sc) db.session.commit() kwargs['kf_id'] = se.kf_id kwargs['sequencing_center_id'] = sc.kf_id return kwargs
def create_seqexp(self): """ create sequencing_center and sequencing experiment save the above entities to db returns sequencing_experiment_id """ sc = SequencingCenter(name="Baylor") se_id = "Test_SequencingExperiment_0" seq_experiment_data = self._make_seq_exp(external_id=se_id) se = SequencingExperiment( **seq_experiment_data, sequencing_center_id=sc.kf_id) sc.sequencing_experiments.extend([se]) db.session.add(sc) db.session.commit() ids = {'sequencing_experiment_id': se_id} return ids
def _create_experiment(self, _id, genomic_files=None, sequencing_center_id=None): """ Create sequencing experiment """ data = { 'external_id': _id, 'experiment_strategy': 'wgs', 'is_paired_end': True, 'platform': 'platform', 'genomic_files': genomic_files or [], 'sequencing_center_id': sequencing_center_id } se = SequencingExperiment(**data) db.session.add(se) db.session.commit() return se
def create_seqexp(self): """ create sequencing_center and sequencing experiment save the above entities to db returns sequencing_experiment_id """ sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") se = SequencingExperiment(external_id="Test_seq_ex_o", experiment_strategy="WGS", is_paired_end="True", platform="Test_platform", sequencing_center_id=sc.kf_id) sc.sequencing_experiments.extend([se]) db.session.add(sc) db.session.commit() ids = { 'sequencing_experiment_id': se.kf_id, 'sequencing_center_id': sc.kf_id } return ids
def test_create_and_find_sequencing_experiment(self): """ Test creation of sequencing_exeriment """ dt = datetime.now() # Create sequencing center sc = SequencingCenter(name="Baylor") # Create sequencing experiment se_id = 'Test_SequencingExperiment_0' seq_experiment_data = self._make_seq_exp(external_id=se_id) e = SequencingExperiment( **seq_experiment_data, sequencing_center_id=sc.kf_id) sc.sequencing_experiments.extend([e]) db.session.add(sc) db.session.commit() self.assertEqual(SequencingExperiment.query.count(), 1) se = SequencingExperiment.query.one() for key, value in seq_experiment_data.items(): self.assertEqual(value, getattr(se, key)) self.assertGreater(se.created_at, dt) self.assertGreater(se.modified_at, dt) self.assertIs(type(uuid.UUID(se.uuid)), uuid.UUID)
def create_seqexp_seqcen(self): """ create sequencial center save to db returns sequencing_center kf_id """ sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(sc) db.session.commit() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=sc.kf_id) db.session.add(seq_exp) db.session.commit() ids = {'sequencing_center_id': sc.kf_id} return ids
def participants(client): # Add a bunch of studies for pagination for i in range(101): s = Study(external_id='Study_{}'.format(i)) db.session.add(s) for i in range(101): ca = CavaticaApp(name='app', revision=0) db.session.add(ca) # Add a bunch of study files s0 = Study.query.filter_by(external_id='Study_0').one() s1 = Study.query.filter_by(external_id='Study_1').one() for i in range(101): sf = StudyFile(file_name='blah', study_id=s0.kf_id) db.session.add(sf) # Add a bunch of investigators for _ in range(102): inv = Investigator(name='test') inv.studies.extend([s0, s1]) db.session.add(inv) # Add a bunch of families families = [] for i in range(101): families.append(Family(external_id='Family_{}'.format(i))) db.session.add_all(families) db.session.flush() participants = [] f0 = Family.query.filter_by(external_id='Family_0').one() f1 = Family.query.filter_by(external_id='Family_1').one() seq_cen = None for i in range(102): f = f0 if i < 50 else f1 s = s0 if i < 50 else s1 data = { 'external_id': "test", 'is_proband': True, 'race': 'Asian', 'ethnicity': 'Hispanic or Latino', 'diagnosis_category': 'Cancer', 'gender': 'Male' } p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id) diag = Diagnosis() p.diagnoses = [diag] outcome = Outcome() p.outcomes = [outcome] phen = Phenotype() p.phenotypes = [phen] participants.append(p) db.session.add(p) db.session.flush() seq_data = { 'external_id': 'Seq_0', 'experiment_strategy': 'WXS', 'library_name': 'Test_library_name_1', 'library_strand': 'Unstranded', 'is_paired_end': False, 'platform': 'Test_platform_name_1' } gf_kwargs = { 'external_id': 'gf_0', 'file_name': 'hg38.fq', 'data_type': 'Aligned Reads', 'file_format': 'fastq', 'size': 1000, 'urls': ['s3://bucket/key'], 'hashes': { 'md5': str(uuid.uuid4()) }, 'controlled_access': False } seq_cen = SequencingCenter.query.filter_by(name="Baylor")\ .one_or_none() if seq_cen is None: seq_cen = SequencingCenter(external_id='SC_0', name="Baylor") db.session.add(seq_cen) db.session.flush() seq_exp = SequencingExperiment(**seq_data, sequencing_center_id=seq_cen.kf_id) db.session.add(seq_exp) samp = Biospecimen(analyte_type='an analyte', sequencing_center_id=seq_cen.kf_id, participant=p) db.session.add(samp) p.biospecimens = [samp] gf = GenomicFile(**gf_kwargs, sequencing_experiment_id=seq_exp.kf_id) db.session.add(gf) samp.genomic_files.append(gf) samp.diagnoses.append(diag) db.session.flush() rg = ReadGroup(lane_number=4, flow_cell='FL0123') rg.genomic_files.append(gf) ct = CavaticaTask(name='task_{}'.format(i)) ct.genomic_files.append(gf) ca.cavatica_tasks.append(ct) # Family relationships for participant1, participant2 in iterate_pairwise(participants): gender = participant1.gender rel = 'mother' if gender == 'male': rel = 'father' r = FamilyRelationship(participant1=participant1, participant2=participant2, participant1_to_participant2_relation=rel) db.session.add(r) db.session.commit()
def _create_save_to_db(self): """ Create and save biospecimen Requires creating a participant Create a biospecimen and add it to participant as kwarg Save participant """ dt = datetime.now() study = Study(external_id='phs001') db.session.add(study) db.session.commit() sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none() if sc is None: sc = SequencingCenter(name="Baylor") db.session.add(sc) db.session.commit() se = SequencingExperiment(external_id="Test_seq_ex_o", experiment_strategy="WGS", is_paired_end="True", platform="Test_platform", sequencing_center_id=sc.kf_id) db.session.add(se) db.session.commit() # Create biospecimen kwargs = { 'external_sample_id': 's1', 'external_aliquot_id': 'a1', 'source_text_tissue_type': 'Normal', 'composition': 'composition1', 'source_text_anatomical_site': 'Brain', 'age_at_event_days': 365, 'source_text_tumor_descriptor': 'Metastatic', 'shipment_origin': 'CORIELL', 'analyte_type': 'DNA', 'concentration_mg_per_ml': 100, 'volume_ul': 12.67, 'shipment_date': dt, 'spatial_descriptor': 'left side', 'ncit_id_tissue_type': 'Test', 'ncit_id_anatomical_site': 'C12439', 'uberon_id_anatomical_site': 'UBERON:0000955', 'consent_type': 'GRU-IRB', 'dbgap_consent_code': 'phs00000.c1', 'sequencing_center_id': sc.kf_id } d = Biospecimen(**kwargs) # Create and save participant with biospecimen p = Participant(external_id='Test subject 0', biospecimens=[d], is_proband=True, study_id=study.kf_id) db.session.add(p) db.session.commit() kwargs['participant_id'] = p.kf_id kwargs['kf_id'] = d.kf_id return kwargs