def test_create_and_find(self):
        """
        Test create genomic file
        """
        # Create genomic file dependent entities
        self._create_save_dependents()

        self.assertEqual(Participant.query.count(), 1)
        self.assertEqual(Biospecimen.query.count(), 2)

        se = SequencingExperiment.query.all()[0]

        # Properties keyed on kf_id
        kwargs_dict = {}
        for i in range(2):
            kwargs = {
                'external_id': 'genomic_file_{}'.format(i),
                'file_name': 'file_{}'.format(i),
                'data_type': 'submitted aligned reads',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'hashes': {
                    'md5': str(uuid.uuid4())
                },
                'controlled_access': True,
                'is_harmonized': True,
                'reference_genome': 'Test01',
                'paired_end': 1,
                'availability': 'Immediate Download',
                'sequencing_experiment_id': se.kf_id
            }
            # Add genomic file to db session
            gf = GenomicFile(**kwargs)
            db.session.add(gf)
            db.session.flush()
            kwargs['kf_id'] = gf.kf_id
            kwargs_dict[kwargs['kf_id']] = kwargs
        db.session.commit()

        # Check all input field values with persisted field values
        # for each genomic file
        self.indexd.Session().get.side_effect = None
        for kf_id, kwargs in kwargs_dict.items():
            # Mock out the response from indexd for the file
            mock_file = {
                'file_name': kwargs['file_name'],
                'urls': kwargs['urls'],
                'hashes': kwargs['hashes']
            }
            self.indexd.Session().get.return_value = MockResp(resp=mock_file)

            gf = GenomicFile.query.get(kf_id)
            gf.merge_indexd()
            for k, v in kwargs.items():
                self.assertEqual(getattr(gf, k), v)
Exemplo n.º 2
0
    def _create_genomic_files(self, total):
        """
        Creates genomic files with sequencing experiments
        """
        max_size_mb = 5000
        min_size_mb = 1000

        gf_list = []
        for i in range(total):
            kwargs = {
                'file_name': 'file_{}'.format(i),
                'size':
                (random.randint(min_size_mb, max_size_mb) * MB_TO_BYTES),
                'data_type': random.choice(self.data_type_list),
                'file_format': random.choice(self.file_format_list),
                'urls': ['s3://file_{}'.format(i)],
                'controlled_access':
                random.choice(self.controlled_access_list),
                'hashes': {
                    'md5': str(uuid.uuid4()).replace('-', '')
                }
            }
            se = random.choice(SequencingExperiment.query.all())
            gf_list.append(
                GenomicFile(**kwargs, sequencing_experiment_id=se.kf_id))
        return gf_list
    def test_post(self):
        """
        Test create a new read_group_genomic_file
        """
        # Create needed entities
        gf = GenomicFile(external_id='gf0')
        rg = ReadGroup(external_id='rg0')
        db.session.add_all([gf, rg])
        db.session.commit()

        kwargs = {
            'read_group_id': rg.kf_id,
            'genomic_file_id': gf.kf_id,
            'external_id': 'rg0-gf0'
        }

        # Send get request
        response = self.client.post(url_for(RG_GF_LIST_URL),
                                    data=json.dumps(kwargs),
                                    headers=self._api_headers())

        # Check response status status_code
        self.assertEqual(response.status_code, 201)

        # Check response content
        response = json.loads(response.data.decode('utf-8'))
        assert response['results']['kf_id']
        self.assertEqual(1, ReadGroupGenomicFile.query.count())
    def _create_save_to_db(self):
        """
        Make all entities
        """
        # Create many to many rg and gf
        rgs = []
        gfs = []
        for i in range(2):
            gfs.append(GenomicFile(external_id='gf{}'.format(i)))
            rgs.append(ReadGroup(external_id='rg{}'.format(i)))
        db.session.add(
            ReadGroupGenomicFile(genomic_file=gfs[0],
                                 read_group=rgs[0],
                                 external_id='rg0-gf0'))
        db.session.add(
            ReadGroupGenomicFile(genomic_file=gfs[0],
                                 read_group=rgs[1],
                                 external_id='rg1-gf0'))
        db.session.add(
            ReadGroupGenomicFile(genomic_file=gfs[1],
                                 read_group=rgs[0],
                                 external_id='rg0-gf1'))
        db.session.add(
            ReadGroupGenomicFile(genomic_file=gfs[1],
                                 read_group=rgs[1],
                                 external_id='rg1-gf1'))

        db.session.commit()

        return rgs, gfs
Exemplo n.º 5
0
    def test_update(self, mock):
        """
        Test update task
        """
        indexd = MockIndexd()
        mock.Session().post = indexd.post
        mock.Session().get = indexd.get
        mock.Session().put = indexd.put
        # Create and save tasks and dependents
        participants, tasks = self._create_and_save_tasks()
        # Create new genomic_file
        p0 = Participant.query.filter_by(external_id='Fred').one()
        gf_new = GenomicFile(data_type='slide_image', file_name='slide_image1')
        (p0.biospecimens[0].genomic_files.append(gf_new))
        db.session.commit()

        # Unlink task from a genomic file and link to a new one
        tgf = TaskGenomicFile.query.first()
        t_id = tgf.task_id
        gf_id = tgf.genomic_file_id

        tgf.genomic_file_id = gf_new.kf_id
        db.session.commit()

        # Check database
        t = Task.query.get(t_id)
        gf = GenomicFile.query.get(gf_id)
        self.assertNotIn(gf, t.genomic_files)
        self.assertIn(gf_new, t.genomic_files)
        self.assertEqual(9, GenomicFile.query.count())
        self.assertEqual(16, TaskGenomicFile.query.count())
    def test_post(self):
        """
        Test create a new sequencing_experiment_genomic_file
        """
        # Create needed entities
        gf = GenomicFile(external_id='gf0')
        sc = SequencingCenter(name='sc')
        se = SequencingExperiment(external_id='se0',
                                  experiment_strategy='WGS',
                                  is_paired_end=True,
                                  platform='platform',
                                  sequencing_center=sc)
        db.session.add_all([gf, se])
        db.session.commit()

        kwargs = {'sequencing_experiment_id': se.kf_id,
                  'genomic_file_id': gf.kf_id,
                  'external_id': 'se0-gf0'
                  }

        # Send get request
        response = self.client.post(url_for(SE_GF_LIST_URL),
                                    data=json.dumps(kwargs),
                                    headers=self._api_headers())

        # Check response status status_code
        self.assertEqual(response.status_code, 201)

        # Check response content
        response = json.loads(response.data.decode('utf-8'))
        assert response['results']['kf_id']
        self.assertEqual(1, SequencingExperimentGenomicFile.query.count())
Exemplo n.º 7
0
    def _create_save_genomic_files(self):
        """
        Create and save genomic files to database
        """
        # Create and save genomic file dependent entities
        self._create_save_dependents()
        # Create genomic files
        biospecimen = Biospecimen.query.all()[0]
        kwargs_dict = {}
        for i in range(2):
            kwargs = {
                'external_id': 'genomic_file_{}'.format(i),
                'file_name': 'file_{}'.format(i),
                'size': (random.randint(MIN_SIZE_MB, MAX_SIZE_MB) *
                         MB_TO_BYTES),
                'data_type': 'submitted aligned reads',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'controlled_access': True,
                'is_harmonized': True,
                'paired_end': 1,
                'reference_genome': 'Test01',
                'hashes': {'md5': str(uuid.uuid4())},
                'availability': 'Immediate Download'
            }
            # Add genomic file to list in biospecimen
            gf = GenomicFile(**kwargs)
            biospecimen.genomic_files.append(gf)
            db.session.add(gf)
            db.session.flush()
            kwargs['kf_id'] = gf.kf_id
            kwargs_dict[gf.kf_id] = kwargs
        db.session.commit()

        return kwargs_dict
    def test_update(self, mock):
        """
        Test update cavatica_task
        """
        indexd = MockIndexd()
        mock.Session().post = indexd.post
        mock.Session().get = indexd.get
        mock.Session().put = indexd.put
        # Create and save cavatica_tasks and dependents
        participants, cavatica_tasks = self._create_and_save_cavatica_tasks()
        se = SequencingExperiment.query.all()[0]
        # Create new genomic_file
        p0 = Participant.query.filter_by(external_id='Fred').one()
        gf_new = GenomicFile(data_type='slide_image',
                             file_name='slide_image1',
                             sequencing_experiment_id=se.kf_id)
        (p0.biospecimens[0].genomic_files.append(gf_new))
        db.session.commit()

        # Unlink cavatica_task from a genomic file and link to a new one
        ctgf = CavaticaTaskGenomicFile.query.first()
        ct_id = ctgf.cavatica_task_id
        gf_id = ctgf.genomic_file_id

        ctgf.genomic_file_id = gf_new.kf_id
        db.session.commit()

        # Check database
        ct = CavaticaTask.query.get(ct_id)
        gf = GenomicFile.query.get(gf_id)
        self.assertNotIn(gf, ct.genomic_files)
        self.assertIn(gf_new, ct.genomic_files)
        self.assertEqual(9, GenomicFile.query.count())
        self.assertEqual(16, CavaticaTaskGenomicFile.query.count())
    def _create_save_to_db(self):
        """
        Make all entities
        """
        # Create sequencing_center
        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()

        # Data
        kwargs = {
            'experiment_strategy': 'WXS',
            'library_name': 'library',
            'library_strand': 'Unstranded',
            'is_paired_end': False,
            'platform': 'platform',
            'instrument_model': '454 GS FLX Titanium',
            'max_insert_size': 600,
            'mean_insert_size': 500,
            'mean_depth': 40,
            'total_reads': 800,
            'mean_read_length': 200
        }
        # Create many to many se and gf
        ses = []
        gfs = []
        for i in range(2):
            gfs.append(
                GenomicFile(external_id='gf{}'.format(i))
            )
            ses.append(
                SequencingExperiment(**kwargs,
                                     sequencing_center=sc,
                                     external_id='se{}'.format(i))
            )
        db.session.add(SequencingExperimentGenomicFile(
            genomic_file=gfs[0],
            sequencing_experiment=ses[0],
            external_id='se0-gf0'))
        db.session.add(SequencingExperimentGenomicFile(
            genomic_file=gfs[0],
            sequencing_experiment=ses[1],
            external_id='se1-gf0'))
        db.session.add(SequencingExperimentGenomicFile(
            genomic_file=gfs[1],
            sequencing_experiment=ses[0],
            external_id='se0-gf1'))
        db.session.add(SequencingExperimentGenomicFile(
            genomic_file=gfs[1],
            sequencing_experiment=ses[1],
            external_id='se1-gf1'))

        db.session.commit()

        return ses, gfs
Exemplo n.º 10
0
    def _create_all_entities(self):
        """
        Create 2 studies with genomic files and read groups
        """
        sc = SequencingCenter(name='sc')
        studies = []
        ses = {}
        gfs = {}
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p = Participant(external_id='p{}'.format(j))
            s.participants.append(p)
            study_gfs = gfs.setdefault('study{}'.format(j), [])
            for i in range(3):
                b = Biospecimen(external_sample_id='b{}'.format(i),
                                analyte_type='DNA',
                                sequencing_center=sc,
                                participant=p)
                gf = GenomicFile(
                    external_id='study{}-gf{}'.format(j, i),
                    urls=['s3://mybucket/key'],
                    hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
                study_gfs.append(gf)
                b.genomic_files.append(gf)

            study_ses = ses.setdefault('study{}'.format(j), [])
            dt = datetime.now()
            kwargs = {
                'experiment_date': str(dt.replace(tzinfo=tz.tzutc())),
                'experiment_strategy': 'WXS',
                'library_name': 'Test_library_name_1',
                'library_strand': 'Unstranded',
                'is_paired_end': False,
                'platform': 'Illumina',
                'instrument_model': '454 GS FLX Titanium',
                'max_insert_size': 600,
                'mean_insert_size': 500,
                'mean_depth': 40,
                'total_reads': 800,
                'mean_read_length': 200
            }
            se0 = SequencingExperiment(**kwargs,
                                       sequencing_center=sc,
                                       external_id='study{}-se0'.format(j))
            se0.genomic_files.extend(study_gfs[0:2])
            se1 = SequencingExperiment(**kwargs,
                                       sequencing_center=sc,
                                       external_id='study{}-se1'.format(j))
            se1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

            study_ses.extend([se0, se1])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        return ses, gfs, studies
def _create_all_entities():
    """
    Create 2 studies with genomic files and read groups
    """
    sc = SequencingCenter(name='sc')
    studies = []
    ses = {}
    rgs = {}
    gfs = {}
    for j in range(2):
        s = Study(external_id='s{}'.format(j))
        p = Participant(external_id='p{}'.format(j))
        s.participants.append(p)
        study_gfs = gfs.setdefault('study{}'.format(j), [])
        for i in range(3):
            b = Biospecimen(external_sample_id='b{}'.format(i),
                            analyte_type='DNA',
                            sequencing_center=sc,
                            participant=p)
            gf = GenomicFile(
                external_id='study{}-gf{}'.format(j, i),
                urls=['s3://mybucket/key', 'https://gen3.something.com/did'],
                hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
            study_gfs.append(gf)
            b.genomic_files.append(gf)

        study_rgs = rgs.setdefault('study{}'.format(j), [])
        rg0 = ReadGroup(external_id='study{}-rg0'.format(j))
        rg0.genomic_files.extend(study_gfs[0:2])
        rg1 = ReadGroup(external_id='study{}-rg1'.format(j))
        rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

        study_ses = ses.setdefault('study{}'.format(j), [])
        se0 = SequencingExperiment(external_id='study{}-se0'.format(j),
                                   experiment_strategy='WGS',
                                   is_paired_end=True,
                                   platform='platform',
                                   sequencing_center=sc)
        se0.genomic_files.extend(study_gfs[0:2])
        se1 = SequencingExperiment(external_id='study{}-se1'.format(j),
                                   experiment_strategy='WGS',
                                   is_paired_end=True,
                                   platform='platform',
                                   sequencing_center=sc)
        se1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

        study_rgs.extend([rg0, rg1])
        study_ses.extend([se0, se1])
        studies.append(s)

    db.session.add_all(studies)
    db.session.commit()

    return ses, rgs, gfs, studies
    def _create_entities(self):
        """
        Create participant with required entities
        """
        # Sequencing center
        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()

        # Create study
        study = Study(external_id='phs001')

        # Participants
        p = Participant(external_id='p0', is_proband=True, study=study)

        # Biospecimen
        bs = Biospecimen(analyte_type='dna',
                         sequencing_center=sc,
                         participant=p)

        # SequencingExperiment
        data = {
            'external_id': 'se',
            'experiment_strategy': 'wgs',
            'is_paired_end': True,
            'platform': 'platform',
            'sequencing_center': sc
        }
        se = SequencingExperiment(**data)

        # Genomic Files
        genomic_files = []
        for i in range(4):
            data = {
                'file_name': 'gf_{}'.format(i),
                'data_type': 'submitted aligned read',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'hashes': {
                    'md5': str(uuid.uuid4())
                },
                'is_harmonized': True if i % 2 else False
            }
            gf = GenomicFile(**data)
            bs.genomic_files.append(gf)
            se.genomic_files.append(gf)
            genomic_files.append(gf)

        ct = self._create_cavatica_task('ct1')
        db.session.add(ct)
        db.session.add(study)
        db.session.commit()
def genomic_files(client, entities):

    props = {
        'external_id': 'genomic_file_0',
        'file_name': 'hg38.bam',
        'data_type': 'Aligned Reads',
        'file_format': 'bam'
    }
    gfs = [GenomicFile(**props) for _ in range(EXPECTED_TOTAL - ENTITY_TOTAL)]
    db.session.add_all(gfs)
    db.session.commit()
    db.session.expunge_all()
 def _create_genomic_file(self, _id, data_type='submitted aligned read',
                          sequencing_experiment_id=None, biospec_id=None):
     """
     Create genomic file
     """
     data = {
         'file_name': 'file_{}'.format(_id),
         'data_type': data_type,
         'file_format': '.cram',
         'urls': ['s3://file_{}'.format(_id)],
         'hashes': {'md5': str(uuid.uuid4())},
         'sequencing_experiment_id': sequencing_experiment_id
     }
     return GenomicFile(**data)
    def _create_entities(self):
        # Create study
        study = Study(external_id='phs001')

        # Create participant
        p = Participant(external_id='p1',
                        is_proband=True, study=study)

        # Create sequencing_center
        sc = SequencingCenter.query.filter_by(name="Baylor").one_or_none()
        if sc is None:
            sc = SequencingCenter(name="Baylor")
            db.session.add(sc)
            db.session.commit()

        # Create sequencing experiments
        se1 = SequencingExperiment(**self._make_seq_exp('se1'),
                                   sequencing_center_id=sc.kf_id)
        se2 = SequencingExperiment(**self._make_seq_exp('se2'),
                                   sequencing_center_id=sc.kf_id)

        # Create biospecimen
        bs = Biospecimen(external_sample_id='bio1', analyte_type='dna',
                         participant_id=p.kf_id,
                         sequencing_center_id=sc.kf_id)
        # Create genomic files
        gfs = []
        for i in range(4):
            kwargs = {
                'file_name': 'file_{}'.format(i),
                'data_type': 'submitted aligned read',
                'file_format': '.cram',
                'urls': ['s3://file_{}'.format(i)],
                'hashes': {'md5': str(uuid.uuid4())},
                'controlled_access': True,
                'is_harmonized': True,
                'reference_genome': 'Test01'
            }
            gf = GenomicFile(**kwargs,
                             sequencing_experiment_id=se1.kf_id)
            if i % 2:
                se1.genomic_files.append(gf)
            else:
                se2.genomic_files.append(gf)
            gfs.append(gf)
        bs.genomic_files = gfs
        p.biospecimens = [bs]
        db.session.add(p)
        db.session.commit()
    def _create_all_entities(self):
        """
        Create 2 studies with genomic files and read groups
        """
        sc = SequencingCenter(name='sc')
        studies = []
        rgs = {}
        gfs = {}
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p = Participant(external_id='p{}'.format(j))
            s.participants.append(p)
            study_gfs = gfs.setdefault('study{}'.format(j), [])
            for i in range(3):
                b = Biospecimen(external_sample_id='b{}'.format(i),
                                analyte_type='DNA',
                                sequencing_center=sc,
                                participant=p)
                gf = GenomicFile(
                    external_id='study{}-gf{}'.format(j, i),
                    urls=['s3://mybucket/key'],
                    hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
                study_gfs.append(gf)
                b.genomic_files.append(gf)

            study_rgs = rgs.setdefault('study{}'.format(j), [])

            rg0 = ReadGroup(external_id='study{}-rg0'.format(j))
            rg0.genomic_files.extend(study_gfs[0:2])
            rg1 = ReadGroup(external_id='study{}-rg1'.format(j))
            rg1.genomic_files.extend([study_gfs[0], study_gfs[-1]])

            study_rgs.extend([rg0, rg1])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        return rgs, gfs, studies
    def participants(client):

        # Add a bunch of studies for pagination
        for i in range(101):
            s = Study(external_id='Study_{}'.format(i))
            db.session.add(s)

        for i in range(101):
            ca = CavaticaApp(name='app', revision=0)
            db.session.add(ca)

        # Add a bunch of study files
        s0 = Study.query.filter_by(external_id='Study_0').one()
        s1 = Study.query.filter_by(external_id='Study_1').one()
        for i in range(101):
            sf = StudyFile(file_name='blah', study_id=s0.kf_id)
            db.session.add(sf)

        # Add a bunch of investigators
        for _ in range(102):
            inv = Investigator(name='test')
            inv.studies.extend([s0, s1])
            db.session.add(inv)

        # Add a bunch of families
        families = []
        for i in range(101):
            families.append(Family(external_id='Family_{}'.format(i)))
        db.session.add_all(families)
        db.session.flush()

        participants = []
        f0 = Family.query.filter_by(external_id='Family_0').one()
        f1 = Family.query.filter_by(external_id='Family_1').one()
        seq_cen = None
        for i in range(102):
            f = f0 if i < 50 else f1
            s = s0 if i < 50 else s1
            data = {
                'external_id': "test",
                'is_proband': True,
                'race': 'Asian',
                'ethnicity': 'Hispanic or Latino',
                'diagnosis_category': 'Cancer',
                'gender': 'Male'
            }
            p = Participant(**data, study_id=s.kf_id, family_id=f.kf_id)
            diag = Diagnosis()
            p.diagnoses = [diag]
            outcome = Outcome()
            p.outcomes = [outcome]
            phen = Phenotype()
            p.phenotypes = [phen]
            participants.append(p)
            db.session.add(p)
            db.session.flush()

            seq_data = {
                'external_id': 'Seq_0',
                'experiment_strategy': 'WXS',
                'library_name': 'Test_library_name_1',
                'library_strand': 'Unstranded',
                'is_paired_end': False,
                'platform': 'Test_platform_name_1'
            }
            gf_kwargs = {
                'external_id': 'gf_0',
                'file_name': 'hg38.fq',
                'data_type': 'Aligned Reads',
                'file_format': 'fastq',
                'size': 1000,
                'urls': ['s3://bucket/key'],
                'hashes': {
                    'md5': str(uuid.uuid4())
                },
                'controlled_access': False
            }
            seq_cen = SequencingCenter.query.filter_by(name="Baylor")\
                .one_or_none()
            if seq_cen is None:
                seq_cen = SequencingCenter(external_id='SC_0', name="Baylor")
                db.session.add(seq_cen)
                db.session.flush()
            seq_exp = SequencingExperiment(**seq_data,
                                           sequencing_center_id=seq_cen.kf_id)
            db.session.add(seq_exp)
            samp = Biospecimen(analyte_type='an analyte',
                               sequencing_center_id=seq_cen.kf_id,
                               participant=p)
            db.session.add(samp)
            p.biospecimens = [samp]

            gf = GenomicFile(**gf_kwargs,
                             sequencing_experiment_id=seq_exp.kf_id)
            db.session.add(gf)
            samp.genomic_files.append(gf)
            samp.diagnoses.append(diag)

            db.session.flush()

            rg = ReadGroup(lane_number=4, flow_cell='FL0123')
            rg.genomic_files.append(gf)

            ct = CavaticaTask(name='task_{}'.format(i))
            ct.genomic_files.append(gf)
            ca.cavatica_tasks.append(ct)

        # Family relationships
        for participant1, participant2 in iterate_pairwise(participants):
            gender = participant1.gender
            rel = 'mother'
            if gender == 'male':
                rel = 'father'
            r = FamilyRelationship(participant1=participant1,
                                   participant2=participant2,
                                   participant1_to_participant2_relation=rel)
            db.session.add(r)

        db.session.commit()
Exemplo n.º 18
0
    def _create_all_entities(self):
        """
        Create 2 studies with same content
        Content: 3 participants, 4 biospecimens, 4 diagnoses
        """
        # Create entities
        sc = SequencingCenter.query.filter_by(name='sc').first()
        if not sc:
            sc = SequencingCenter(name='sc')
        studies = []
        # Two studies
        for j in range(2):
            s = Study(external_id='s{}'.format(j))
            p0 = Participant(external_id='study{}-p0'.format(j))
            p1 = Participant(external_id='study{}-p1'.format(j))
            p2 = Participant(external_id='study{}-p2'.format(j))

            # Participant 0
            # Has 2 Biospecimens
            gf = GenomicFile(
                external_id='study{}-b0-gf0'.format(j),
                urls=['s3://mybucket/key'],
                hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
            for i in range(2):
                b = Biospecimen(external_sample_id='study{}-p0-b{}'.format(
                    j, i),
                                analyte_type='DNA',
                                sequencing_center=sc)
                b.genomic_files.append(gf)
                # Biospecimen b0 has 2 diagnoses
                if i == 0:
                    for k in range(2):
                        d = Diagnosis(
                            external_id='study{}-p0-d{}'.format(j, k))
                        p0.diagnoses.append(d)
                # Biospecimen b1 has 1 diagnosis
                else:
                    d = Diagnosis(
                        external_id='study{}-p0-d{}'.format(j, k + 1))
                    gf = GenomicFile(
                        external_id='study{}-b0-gf{}'.format(j, k + 1),
                        urls=['s3://mybucket/key'],
                        hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
                    b.genomic_files.append(gf)
                    p0.diagnoses.append(d)
                p0.biospecimens.append(b)

            # Participant 1
            # Has 1 biospecimen, 1 diagnosis
            b = Biospecimen(external_sample_id='study{}-p1-b0'.format(j),
                            analyte_type='DNA',
                            sequencing_center=sc)
            d = Diagnosis(external_id='study{}-p1-d0'.format(j))
            gf = GenomicFile(
                external_id='study{}-b1-gf0'.format(j),
                urls=['s3://mybucket/key'],
                hashes={'md5': 'd418219b883fce3a085b1b7f38b01e37'})
            b.genomic_files.append(gf)
            p1.biospecimens.append(b)
            p1.diagnoses.append(d)

            # Participant 2
            # Has 1 biospecimen
            b = Biospecimen(external_sample_id='study{}-p2-b0'.format(j),
                            analyte_type='DNA',
                            sequencing_center=sc)
            p2.biospecimens.append(b)

            s.participants.extend([p0, p1, p2])
            studies.append(s)

        db.session.add_all(studies)
        db.session.commit()

        # Create links between bios and diags
        bs_dgs = []

        # Participant 0
        p0 = studies[0].participants[0]
        # b0-d0
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[0].kf_id))
        # b0-d1
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[1].kf_id))
        # b1-d2
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[1].kf_id,
                                 diagnosis_id=p0.diagnoses[2].kf_id))
        # b0-d2
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p0.biospecimens[0].kf_id,
                                 diagnosis_id=p0.diagnoses[2].kf_id))

        # Participant 1
        p1 = studies[0].participants[1]
        # b0-d0
        bs_dgs.append(
            BiospecimenDiagnosis(biospecimen_id=p1.biospecimens[0].kf_id,
                                 diagnosis_id=p1.diagnoses[0].kf_id))

        db.session.add_all(bs_dgs)
        db.session.commit()