Пример #1
0
 def test_sra_metadata_is_harmonized(self):
     metadata = SraSurveyor.gather_all_metadata("SRR3098582")
     sample = Sample()
     SraSurveyor._apply_harmonized_metadata_to_sample(sample, metadata)
     self.assertEqual(sample.treatment, "biliatresone")
     self.assertEqual(sample.subject, "liver")
     self.assertEqual(sample.specimen_part, "liver")
Пример #2
0
    def test_survey_unmated_reads(self, mock_send_job):
        """Test an experiment with unmated reads.

        Also make sure the file report endpoint's properties are recorded.
        """
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP048683"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "SRP048683")
        self.assertEqual(len(samples), 12)

        expected_file_names = set()
        # Just check one file for one sample's expected file size/md5
        for sample in samples:
            if sample.accession_code == "SRR1603661":
                for original_file in sample.original_files.all():
                    expected_file_names.add(original_file.source_filename)
                    if original_file.source_filename == "SRR1603661_1.fastq.gz":
                        self.assertEqual(
                            original_file.expected_md5, "502a9a482bfa5aa75865ccc0105ad13c"
                        )
                        self.assertEqual(original_file.expected_size_in_bytes, 6751980628)

        self.assertEqual({"SRR1603661_1.fastq.gz", "SRR1603661_2.fastq.gz"}, expected_file_names)
Пример #3
0
 def test_get_next_accession(self):
     self.assertEqual(SraSurveyor.get_next_accession("DRR123456"),
                      "DRR123457")
     self.assertEqual(SraSurveyor.get_next_accession("DRR1234567"),
                      "DRR1234568")
     self.assertEqual(SraSurveyor.get_next_accession("DRR12345678"),
                      "DRR12345679")
     self.assertEqual(SraSurveyor.get_next_accession("DRR123456789"),
                      "DRR123456790")
Пример #4
0
    def test_survey(self):
        """A Simple test of the SRA surveyor.
        """
        sra_surveyor = SraSurveyor(self.survey_job)
        sra_surveyor.discover_experiment_and_samples()

        samples = Sample.objects.all()

        # We are expecting this to discover 1 sample.
        self.assertEqual(samples.count(), 1)
        # Confirm the sample's protocol_info
        experiment = Experiment.objects.all().first()
        self.assertEqual(samples.first().protocol_info[0]["Description"],
                         experiment.protocol_description)
Пример #5
0
    def test_batch_created(self, mock_get):
        mock_get.side_effect = mocked_requests_get

        # Use same run accession for the start and end of the range to
        # achieve a length of 1
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="start_accession",
                                           value=RUN_ACCESSION)
        key_value_pair.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="end_accession",
                                           value=RUN_ACCESSION)
        key_value_pair.save()

        surveyor = SraSurveyor(survey_job)

        self.assertTrue(surveyor.discover_batches())
        # With only a single run accession there should only be a
        # single batch.
        self.assertEqual(len(surveyor.batches), 1)

        batch = surveyor.batches[0]
        self.assertEqual(batch.survey_job.id, survey_job.id)
        self.assertEqual(batch.source_type, "SRA")
        self.assertEqual(batch.pipeline_required, "SALMON")
        self.assertEqual(batch.platform_accession_code, "IlluminaHiSeq2000")
        self.assertEqual(batch.experiment_accession_code, "DRX001563")
        self.assertEqual(batch.experiment_title,
                         ("Illumina HiSeq 2000 sequencing; "
                          "Exp_Gg_HH16_1_embryo_mRNAseq"))
        self.assertEqual(batch.status, "NEW")
        self.assertEqual(batch.release_date, "2013-07-19")
        self.assertEqual(batch.last_uploaded_date, "2017-08-11")
        self.assertEqual(batch.organism_id, 9031)
        self.assertEqual(batch.organism_name, "GALLUS GALLUS")

        file = batch.files[0]
        self.assertEqual(file.size_in_bytes, -1)
        self.assertEqual(
            file.download_url,
            "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/DRR002/DRR002116/DRR002116.fastq.gz"
        )  # noqa
        self.assertEqual(file.raw_format, "fastq.gz")
        self.assertEqual(file.processed_format, "tar.gz")
        self.assertEqual(file.name, "DRR002116.fastq.gz")
        self.assertEqual(file.internal_location, "IlluminaHiSeq2000/SALMON")
Пример #6
0
    def test_arrayexpress_alternate_accession(self):
        """ Make sure that ENA experiments correctly detect their ArrayExpress alternate accession
        """

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="ERP108370"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, _ = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "ERP108370")
        self.assertEqual(experiment.alternate_accession_code, "E-MTAB-6681")
Пример #7
0
    def test_sra_metadata_is_harmonized(self):
        metadata = SraSurveyor.gather_all_metadata("SRR3098582")
        sample = Sample()
        SraSurveyor._apply_harmonized_metadata_to_sample(sample, metadata)
        self.assertEqual(sample.treatment, "biliatresone")
        self.assertEqual(sample.subject, "liver")
        self.assertEqual(sample.specimen_part, "liver")

        experiment = Experiment()
        SraSurveyor._apply_metadata_to_experiment(experiment, metadata)
        self.assertEqual(
            experiment.title,
            "Transcriptional profiling through RNA-seq of zebrafish larval"
            " liver after exposure to biliatresone, a biliary toxin.",
        )
        self.assertEqual(experiment.source_first_published, datetime.date(2017, 9, 25))
        self.assertEqual(experiment.source_last_modified, datetime.date(2017, 9, 25))
Пример #8
0
def _get_surveyor_for_source(survey_job: SurveyJob):
    """Factory method for ExternalSourceSurveyors."""
    if survey_job.source_type == "ARRAY_EXPRESS":
        return ArrayExpressSurveyor(survey_job)
    if survey_job.source_type == "SRA":
        return SraSurveyor(survey_job)
    if survey_job.source_type == "TRANSCRIPTOME_INDEX":
        return TranscriptomeIndexSurveyor(survey_job)
    else:
        raise SourceNotSupportedError("Source " + survey_job.source_type +
                                      " is not supported.")
Пример #9
0
    def test_discover_batches(self, mock_generate_batch):
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()

        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="start_accession",
                                           value="DRR012345")
        key_value_pair.save()
        key_value_pair = SurveyJobKeyValue(survey_job=survey_job,
                                           key="end_accession",
                                           value="DRR012348")
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        sra_surveyor.discover_batches()

        mock_generate_batch.assert_has_calls([
            call("DRR012345"),
            call("DRR012346"),
            call("DRR012347"),
            call("DRR012348")
        ])
Пример #10
0
    def test_srp_survey(self, mock_send_job):
        """A slightly harder test of the SRA surveyor.
        """
        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP068364"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()
        self.assertEqual(experiment.accession_code, "SRP068364")
        self.assertEqual(experiment.alternate_accession_code, "GSE76780")
        self.assertEqual(len(samples), 4)

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="SRP111553"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "SRP111553")
        self.assertEqual(experiment.alternate_accession_code, "GSE101204")
        self.assertEqual(len(samples), 16)  # 8 samples with 2 runs each

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        key_value_pair = SurveyJobKeyValue(
            survey_job=survey_job, key="experiment_accession_code", value="DRP003977"
        )
        key_value_pair.save()

        sra_surveyor = SraSurveyor(survey_job)
        experiment, samples = sra_surveyor.discover_experiment_and_samples()

        self.assertEqual(experiment.accession_code, "DRP003977")
        self.assertEqual(experiment.alternate_accession_code, None)
        self.assertEqual(len(samples), 9)
Пример #11
0
    def test_sra_harmony(self):
        """
        Tests a specific harmonization from SRA
        """

        metadata = SraSurveyor.gather_all_metadata("SRR1533126")
        harmonized_sample = self._harmonizer.harmonize_sample(metadata)
        title = "Phosphaturic mesenchymal tumour (PMT) case 2 of NTUH"
        self.assertEqual(title, harmonized_sample["title"])
        self.assertTrue("sex" in harmonized_sample.keys())
        self.assertEqual("female", harmonized_sample["sex"])

        self.assertTrue("age" in harmonized_sample.keys())
        self.assertEqual(57.0, harmonized_sample["age"])

        self.assertTrue("specimen_part" in harmonized_sample.keys())
        self.assertTrue("disease" in harmonized_sample.keys())
Пример #12
0
    def test_sra_lots(self):
        """
        Smoke tests a few SRA types
        """

        # These can be built via
        #    https://www.ncbi.nlm.nih.gov/sra
        # Searching for
        #    (human) NOT cluster_dbgap[PROP]
        # And then Sent To -> File -> Accession List
        lots = [
            "ERR188021",
            "ERR188022",
            "ERR205021",
            "ERR205022",
            "ERR205023",
            "SRR000001",  # Soft fail, bad platform
            "ERR1737666",
            "ERR030891",
            "ERR030892",
            "SRR1542948",
            "SRR1553477",
            "SRR1542330",
            "SRR1538698",
            "SRR1538760",
            "SRR1538866",
            "SRR1539218",
            "SRR1797277",
            "SRR1533126",
        ]
        for accession in lots:
            try:
                metadata = SraSurveyor.gather_all_metadata(accession)
                harmonized = harmonize_all_samples([metadata])
                self.assertIsNotNone(harmonized)
            except UnsupportedDataTypeError:
                continue
Пример #13
0
    def test_queue_downloader_jobs_for_original_files(self, mock_send_task):
        """Make sure that queue_downloader_jobs queues all expected Downloader
        jobs for a given experiment.
        """
        # First, create an experiment with two samples associated with it
        # and create two original files for each of those samples.
        experiment_object = Experiment()
        experiment_object.accession_code = "Experiment1"
        experiment_object.save()

        sample_object_1 = Sample()
        sample_object_1.accession_code = "Sample1"
        sample_object_1.platform_accession_code = "Illumina Genome Analyzer"
        sample_object_1.platform_accession_name = "Illumina Genome Analyzer"
        sample_object_1.technology = "RNA-SEQ"
        sample_object_1.manufacturer = "ILLUMINA"
        sample_object_1.source_database = "SRA"
        sample_object_1.save()
        sample_object_2 = Sample()
        sample_object_2.accession_code = "Sample2"
        sample_object_2.platform_accession_code = "Illumina Genome Analyzer"
        sample_object_2.platform_accession_name = "Illumina Genome Analyzer"
        sample_object_2.technology = "RNA-SEQ"
        sample_object_2.manufacturer = "ILLUMINA"
        sample_object_2.source_database = "SRA"
        sample_object_2.save()

        association = ExperimentSampleAssociation()
        association.experiment = experiment_object
        association.sample = sample_object_1
        association.save()

        association = ExperimentSampleAssociation()
        association.experiment = experiment_object
        association.sample = sample_object_2
        association.save()

        sample_1_original_files = []
        sample_2_original_files = []

        original_file = OriginalFile()
        original_file.source_url = "first_url"
        original_file.source_filename = "first_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_1_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_1
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "second_url"
        original_file.source_filename = "second_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_1
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "third_url"
        original_file.source_filename = "third_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_2
        original_file_sample_association.save()

        original_file = OriginalFile()
        original_file.source_url = "fourth_url"
        original_file.source_filename = "fourth_filename"
        original_file.is_downloaded = False
        original_file.has_raw = True
        original_file.save()
        sample_2_original_files.append(original_file)

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file
        original_file_sample_association.sample = sample_object_2
        original_file_sample_association.save()

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        surveyor = SraSurveyor(survey_job)

        surveyor.queue_downloader_job_for_original_files(
            sample_1_original_files, experiment_object.accession_code
        )
        surveyor.queue_downloader_job_for_original_files(
            sample_2_original_files, experiment_object.accession_code
        )

        self.assertEqual(DownloaderJob.objects.all().count(), 2)
Пример #14
0
    def test_no_repeat_jobs(self):
        """Make sure that queue_downloader_jobs queues all expected Downloader
        jobs for a given experiment.
        """
        # First, create an experiment with two samples associated with it
        # and create two original files for each of those samples.
        experiment_object = Experiment()
        experiment_object.accession_code = "Experiment1"
        experiment_object.save()

        sample_object = Sample()
        sample_object.accession_code = "Sample1"
        sample_object.platform_accession_code = "Illumina Genome Analyzer"
        sample_object.platform_accession_name = "Illumina Genome Analyzer"
        sample_object.technology = "RNA-SEQ"
        sample_object.manufacturer = "ILLUMINA"
        sample_object.source_database = "SRA"
        sample_object.save()

        original_file_1 = OriginalFile()
        original_file_1.source_url = "first_url"
        original_file_1.source_filename = "first_filename"
        original_file_1.is_downloaded = False
        original_file_1.has_raw = True
        original_file_1.save()

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file_1
        original_file_sample_association.sample = sample_object
        original_file_sample_association.save()

        original_file_2 = OriginalFile()
        original_file_2.source_url = "second_url"
        original_file_2.source_filename = "second_filename"
        original_file_2.is_downloaded = False
        original_file_2.has_raw = True
        original_file_2.save()

        original_file_sample_association = OriginalFileSampleAssociation()
        original_file_sample_association.original_file = original_file_2
        original_file_sample_association.sample = sample_object
        original_file_sample_association.save()

        dlj = DownloaderJob()
        dlj.save()

        DownloaderJobOriginalFileAssociation(
            downloader_job=dlj, original_file=original_file_1
        ).save()

        DownloaderJobOriginalFileAssociation(
            downloader_job=dlj, original_file=original_file_2
        ).save()

        survey_job = SurveyJob(source_type="SRA")
        survey_job.save()
        surveyor = SraSurveyor(survey_job)

        surveyor.queue_downloader_job_for_original_files(
            [original_file_1, original_file_2], experiment_object.accession_code
        )

        # We made one DownloaderJob in this test, so
        # queue_downloader_job_for_original_files didn't have anything
        # to do, so there should still be only one:
        self.assertEqual(1, DownloaderJob.objects.all().count())
    def handle(self, *args, **options):
        """Refreshes the metadata for all experiments, or experiments from a specific database
        """
        possible_source_databases = ["ARRAY_EXPRESS", "GEO", "SRA"]

        if options.get("source_database", None) is None:
            experiments = Experiment.objects.all()
        elif options["source_database"] in possible_source_databases:
            source_database = options["source_database"]
            experiments = Experiment.objects.filter(
                source_database=source_database)
        else:
            logger.error('Invalid source database "{}"'.format(
                options["source_database"]) +
                         "\nPossible source databases: {}".format(", ".join(
                             possible_source_databases)))
            sys.exit(1)

        paginator = PerformantPaginator(experiments, PAGE_SIZE)
        page = paginator.page()

        while True:
            for experiment in page.object_list:
                logger.debug("Refreshing metadata for an experiment.",
                             experiment=experiment.accession_code)
                try:
                    if experiment.source_database == "SRA":
                        metadata = SraSurveyor.gather_all_metadata(
                            experiment.samples.first().accession_code)
                        SraSurveyor._apply_metadata_to_experiment(
                            experiment, metadata)

                    elif experiment.source_database == "GEO":
                        gse = GEOparse.get_GEO(
                            experiment.accession_code,
                            destdir="/tmp/management",
                            silent=True,
                        )

                        GeoSurveyor._apply_metadata_to_experiment(
                            experiment, gse)

                    elif experiment.source_database == "ARRAY_EXPRESS":
                        request_url = EXPERIMENTS_URL + experiment.accession_code
                        experiment_request = utils.requests_retry_session(
                        ).get(request_url, timeout=60)
                        try:
                            parsed_json = experiment_request.json(
                            )["experiments"]["experiment"][0]
                        except KeyError:
                            logger.error(
                                "Remote experiment has no Experiment data!",
                                experiment_accession_code=experiment.
                                accession_code,
                                survey_job=self.survey_job.id,
                            )
                            continue
                        ArrayExpressSurveyor._apply_metadata_to_experiment(
                            experiment, parsed_json)

                    experiment.save()

                # If there are any errors, just continue. It's likely that it's
                # just a problem with this experiment.
                except Exception:
                    logger.exception(
                        "exception caught while updating metadata for {}".
                        format(experiment.accession_code))

            if not page.has_next():
                break
            else:
                page = paginator.page(page.next_page_number())

            # 2000 samples queued up every five minutes should be fast
            # enough and also not thrash the DB.
            time.sleep(60 * 5)
Пример #16
0
    def handle(self, *args, **options):
        """Refreshes the metadata for all samples, or samples from a specific database
        """
        possible_source_databases = ["ARRAY_EXPRESS", "GEO", "SRA"]

        if options.get("source_database", None) is None:
            samples = Sample.objects.all()
        elif options["source_database"] in possible_source_databases:
            source_database = options["source_database"]
            samples = Sample.objects.filter(source_database=source_database)
        else:
            logger.error('Invalid source database "{}"'.format(
                options["source_database"]) +
                         "\nPossible source databases: {}".format(", ".join(
                             possible_source_databases)))
            sys.exit(1)

        paginator = PerformantPaginator(samples, PAGE_SIZE)
        page = paginator.page()

        while True:
            for sample in samples:
                logger.debug("Refreshing metadata for a sample.",
                             sample=sample.accession_code)
                if sample.source_database == "SRA":
                    metadata = SraSurveyor.gather_all_metadata(
                        sample.accession_code)
                    SraSurveyor._apply_harmonized_metadata_to_sample(
                        sample, metadata)
                elif sample.source_database == "GEO":
                    gse = GEOparse.get_GEO(
                        sample.experiments.first().accession_code,
                        destdir="/tmp/management",
                        how="brief",
                        silent=True,
                    )
                    preprocessed_samples = harmony.preprocess_geo(
                        gse.gsms.items())
                    harmonized_samples = harmony.harmonize(
                        preprocessed_samples)
                    GeoSurveyor._apply_harmonized_metadata_to_sample(
                        sample, harmonized_samples[sample.title])
                elif sample.source_database == "ARRAY_EXPRESS":
                    SDRF_URL_TEMPLATE = (
                        "https://www.ebi.ac.uk/arrayexpress/files/{code}/{code}.sdrf.txt"
                    )
                    sdrf_url = SDRF_URL_TEMPLATE.format(
                        code=sample.experiments.first().accession_code)
                    sdrf_samples = harmony.parse_sdrf(sdrf_url)
                    harmonized_samples = harmony.harmonize(sdrf_samples)
                    ArrayExpressSurveyor._apply_harmonized_metadata_to_sample(
                        sample, harmonized_samples[sample.title])

                sample.save()

            if not page.has_next():
                break
            else:
                page = paginator.page(page.next_page_number())

            # 2000 samples queued up every five minutes should be fast
            # enough and also not thrash the DB.
            time.sleep(60 * 5)
Пример #17
0
    def test_metadata_is_gathered_correctly(self):

        metadata = SraSurveyor.gather_all_metadata("DRR002116")

        self.assertEqual(metadata["broker_name"], "DDBJ")
        self.assertEqual(metadata["center_name"], "RIKEN_CDB")
        self.assertEqual(metadata["ena-base-count"], "158881910957")
        self.assertEqual(metadata["ena-spot-count"], "1371813555")
        self.assertEqual(metadata["experiment_accession"], "DRX001563")
        self.assertEqual(
            metadata["experiment_design_description"],
            ("Experiment for mRNAseq of chicken at stage " "HH16 (biological replicate 1)"),
        )
        self.assertEqual(
            metadata["experiment_title"],
            ("Illumina HiSeq 2000 sequencing; " "Exp_Gg_HH16_1_embryo_mRNAseq"),
        )
        self.assertEqual(
            metadata["lab_name"],
            (
                "Group for Morphological Evolution, Center for Developmental "
                "Biology, Kobe Institute, RIKEN"
            ),
        )
        self.assertEqual(metadata["library_layout"], "SINGLE")
        self.assertEqual(metadata["library_name"], "Gg_HH16_1_embryo_mRNAseq")
        self.assertEqual(metadata["library_selection"], "RANDOM")
        self.assertEqual(metadata["library_source"], "TRANSCRIPTOMIC")
        self.assertEqual(metadata["library_strategy"], "RNA-Seq")
        self.assertEqual(metadata["organism_id"], "9031")
        self.assertEqual(metadata["organism_name"], "GALLUS GALLUS")
        self.assertEqual(metadata["platform_instrument_model"], "Illumina HiSeq 2000")
        self.assertEqual(metadata["read_spec_0_base_coord"], "1")
        self.assertEqual(metadata["read_spec_0_class"], "Application Read")
        self.assertEqual(metadata["read_spec_0_index"], "0")
        self.assertEqual(metadata["read_spec_0_type"], "Forward")
        self.assertEqual(metadata["run_accession"], "DRR002116")
        self.assertEqual(metadata["run_center"], "RIKEN_CDB")
        self.assertEqual(metadata["run_date"], "2011-09-01T00:00:00+09:00")
        self.assertEqual(metadata["run_ena_base_count"], "3256836000")
        self.assertEqual(metadata["run_ena_first_public"], "2013-07-19")
        self.assertEqual(metadata["run_ena_last_update"], "2017-08-11")
        self.assertEqual(metadata["run_ena_spot_count"], "32568360")
        self.assertEqual(metadata["sample_accession"], "DRS001521")
        self.assertEqual(metadata["sample_center_name"], "BioSample")
        self.assertEqual(metadata["sample_ena_base_count"], "3256836000")
        self.assertEqual(metadata["sample_ena_first_public"], "2013-07-20")
        self.assertEqual(metadata["sample_ena_last_update"], "2015-08-24")
        self.assertEqual(metadata["sample_ena_spot_count"], "32568360")
        self.assertEqual(
            metadata["sample_sample_comment"],
            ("mRNAseq of chicken at stage HH16 (biological " "replicate 1)"),
        )
        self.assertEqual(metadata["sample_sample_name"], "DRS001521")
        self.assertEqual(metadata["sample_title"], "Gg_HH16_1_embryo_mRNAseq")
        self.assertEqual(metadata["spot_length"], "100")
        self.assertEqual(metadata["study_ena_first_public"], "2013-07-19")
        self.assertEqual(metadata["study_ena_last_update"], "2015-06-22")
        self.assertEqual(metadata["study_accession"], "DRP000595")
        self.assertEqual(metadata["submission_accession"], "DRA000567")
        self.assertEqual(
            metadata["submission_comment"],
            (
                "Time course gene expression profiles of turtle "
                "(Pelodiscus sinensis) and chicken (Gallus gallus) "
                "embryos were examined. Whole transcriptome of turtle "
                "was also determined by uding stranded sequencing "
                "methods."
            ),
        )
        self.assertEqual(metadata["submission_title"], "Submitted by RIKEN_CDB on 19-JUL-2013")

        ncbi_url = SraSurveyor._build_ncbi_file_url(metadata["run_accession"])
        self.assertTrue(
            ncbi_url
            in [
                "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra",
                "[email protected]:/sra/sra-instant/reads/ByRun/sra/DRR/DRR002/DRR002116/DRR002116.sra",
                "[email protected]:data/sracloud/traces/dra0/DRR/000002/DRR002116",
            ]
        )