Exemplo n.º 1
0
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
                 feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(map(
             self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.vals[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
Exemplo n.º 2
0
    def search_reference_sets(self,
                              accession=None,
                              md5checksum=None,
                              assembly_id=None):
        """
        Returns an iterator over the ReferenceSets fulfilling the specified
        conditions.

        :param str accession: If not null, return the reference sets for which
            the `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not null, return the reference sets for
            which the `md5checksum` matches this string (case-sensitive, exact
            match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for
            details.
        :param str assembly_id: If not null, return the reference sets for
            which the `assembly_id` matches this string (case-sensitive,
            exact match).
        :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferenceSetsRequest()
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.assembly_id = pb.string(assembly_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "referencesets",
                                        protocol.SearchReferenceSetsResponse)
Exemplo n.º 3
0
    def search_reference_sets(
            self, accession=None, md5checksum=None, assembly_id=None):
        """
        Returns an iterator over the ReferenceSets fulfilling the specified
        conditions.

        :param str accession: If not null, return the reference sets for which
            the `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not null, return the reference sets for
            which the `md5checksum` matches this string (case-sensitive, exact
            match). See :class:`ga4gh.protocol.ReferenceSet::md5checksum` for
            details.
        :param str assembly_id: If not null, return the reference sets for
            which the `assembly_id` matches this string (case-sensitive,
            exact match).
        :return: An iterator over the :class:`ga4gh.protocol.ReferenceSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferenceSetsRequest()
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.assembly_id = pb.string(assembly_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "referencesets", protocol.SearchReferenceSetsResponse)
Exemplo n.º 4
0
 def _gaFeatureForFeatureDbRecord(self, feature):
     """
     :param feature: The DB Row representing a feature
     :return: the corresponding GA4GH protocol.Feature object
     """
     gaFeature = protocol.Feature()
     gaFeature.id = self.getCompoundIdForFeatureId(feature['id'])
     if feature.get('parent_id'):
         gaFeature.parent_id = self.getCompoundIdForFeatureId(
                 feature['parent_id'])
     else:
         gaFeature.parent_id = ""
     gaFeature.feature_set_id = self.getId()
     gaFeature.reference_name = pb.string(feature.get('reference_name'))
     gaFeature.start = pb.int(feature.get('start'))
     gaFeature.end = pb.int(feature.get('end'))
     gaFeature.name = pb.string(feature.get('name'))
     if feature.get('strand', '') == '-':
         gaFeature.strand = protocol.NEG_STRAND
     else:
         # default to positive strand
         gaFeature.strand = protocol.POS_STRAND
     gaFeature.child_ids.extend(map(
             self.getCompoundIdForFeatureId,
             json.loads(feature['child_ids'])))
     gaFeature.feature_type.CopyFrom(
         self._ontology.getGaTermByName(feature['type']))
     attributes = json.loads(feature['attributes'])
     # TODO: Identify which values are ExternalIdentifiers and OntologyTerms
     for key in attributes:
         for v in attributes[key]:
             gaFeature.attributes.vals[key].values.add().string_value = v
     if 'gene_name' in attributes and len(attributes['gene_name']) > 0:
         gaFeature.gene_symbol = pb.string(attributes['gene_name'][0])
     return gaFeature
Exemplo n.º 5
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     for key in self.getInfo():
         dataset.info[key].values.extend(_encodeValue(self._info[key]))
     return dataset
Exemplo n.º 6
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId())
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     return ret
Exemplo n.º 7
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReferenceSet.
     """
     ret = protocol.ReferenceSet()
     ret.assembly_id = pb.string(self.getAssemblyId())
     ret.description = pb.string(self.getDescription())
     ret.id = self.getId()
     ret.is_derived = self.getIsDerived()
     ret.md5checksum = self.getMd5Checksum()
     ret.ncbi_taxon_id = pb.int(self.getNcbiTaxonId())
     ret.source_accessions.extend(self.getSourceAccessions())
     ret.source_uri = pb.string(self.getSourceUri())
     ret.name = self.getLocalId()
     return ret
Exemplo n.º 8
0
    def search_reads(
            self, read_group_ids, reference_id=None, start=None, end=None):
        """
        Returns an iterator over the Reads fulfilling the specified
        conditions from the specified read_group_ids.

        :param str read_group_ids: The IDs of the
            :class:`ga4gh.protocol.ReadGroup` of interest.
        :param str reference_id: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return reads
            mapped to.
        :param int start: The start position (0-based) of this query. If a
            reference is specified, this defaults to 0. Genomic positions are
            non-negative integers less than reference length. Requests spanning
            the join of circular genomes are represented as two requests one on
            each side of the join (position 0).
        :param int end: The end position (0-based, exclusive) of this query.
            If a reference is specified, this defaults to the reference's
            length.
        :return: An iterator over the
            :class:`ga4gh.protocol.ReadAlignment` objects defined by
            the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadsRequest()
        request.read_group_ids.extend(read_group_ids)
        request.reference_id = pb.string(reference_id)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "reads", protocol.SearchReadsResponse)
Exemplo n.º 9
0
 def testToProtocolElement(self):
     dataset = datasets.Dataset('dataset1')
     term = protocol.OntologyTerm()
     term.term = "male genotypic sex"
     term.id = "PATO:0020001"
     term.source_name = "PATO"
     term.source_version = pb.string("2015-11-18")
     # Write out a valid input
     print(protocol.toJsonDict(term))
     validIndividual = protocol.Individual(
         name="test",
         created="2016-05-19T21:00:19Z",
         updated="2016-05-19T21:00:19Z",
         sex=term)
     validIndividual.info['test'].values.add().string_value = 'test-info'
     # pass through protocol creation
     individual = bioMetadata.Individual(
         dataset, "test")
     individual.populateFromJson(protocol.toJson(validIndividual))
     gaIndividual = individual.toProtocolElement()
     # Verify elements exist
     self.assertEqual(gaIndividual.created, validIndividual.created)
     self.assertEqual(gaIndividual.updated, validIndividual.updated)
     # Invalid input
     invalidIndividual = '{"bad:", "json"}'
     individual = bioMetadata.Individual(dataset, "test")
     # Should fail
     self.assertRaises(
         exceptions.InvalidJsonException,
         individual.populateFromJson,
         invalidIndividual)
Exemplo n.º 10
0
 def testToProtocolElement(self):
     dataset = datasets.Dataset('dataset1')
     term = protocol.OntologyTerm()
     term.term = "male genotypic sex"
     term.id = "PATO:0020001"
     term.source_name = "PATO"
     term.source_version = pb.string("2015-11-18")
     # Write out a valid input
     print(protocol.toJsonDict(term))
     validIndividual = protocol.Individual(name="test",
                                           created="2016-05-19T21:00:19Z",
                                           updated="2016-05-19T21:00:19Z",
                                           sex=term)
     validIndividual.info['test'].values.add().string_value = 'test-info'
     # pass through protocol creation
     individual = bioMetadata.Individual(dataset, "test")
     individual.populateFromJson(protocol.toJson(validIndividual))
     gaIndividual = individual.toProtocolElement()
     # Verify elements exist
     self.assertEqual(gaIndividual.created, validIndividual.created)
     self.assertEqual(gaIndividual.updated, validIndividual.updated)
     # Invalid input
     invalidIndividual = '{"bad:", "json"}'
     individual = bioMetadata.Individual(dataset, "test")
     # Should fail
     self.assertRaises(exceptions.InvalidJsonException,
                       individual.populateFromJson, invalidIndividual)
Exemplo n.º 11
0
    def searchReads(
            self, readGroupIds, referenceId=None, start=None, end=None):
        """
        Returns an iterator over the Reads fulfilling the specified
        conditions from the specified ReadGroupIds.

        :param str readGroupIds: The IDs of the
            :class:`ga4gh.protocol.ReadGroup` of interest.
        :param str referenceId: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return reads
            mapped to.
        :param int start: The start position (0-based) of this query. If a
            reference is specified, this defaults to 0. Genomic positions are
            non-negative integers less than reference length. Requests spanning
            the join of circular genomes are represented as two requests one on
            each side of the join (position 0).
        :param int end: The end position (0-based, exclusive) of this query.
            If a reference is specified, this defaults to the reference's
            length.
        :return: An iterator over the
            :class:`ga4gh.protocol.ReadAlignment` objects defined by
            the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadsRequest()
        request.read_group_ids.extend(readGroupIds)
        request.reference_id = pb.string(referenceId)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "reads", protocol.SearchReadsResponse)
Exemplo n.º 12
0
 def getSerializedResponse(self):
     """
     Returns a string version of the SearchResponse that has
     been built by this SearchResponseBuilder.
     """
     self._protoObject.next_page_token = pb.string(self._nextPageToken)
     s = toJson(self._protoObject)
     return s
Exemplo n.º 13
0
 def getSerializedResponse(self):
     """
     Returns a string version of the SearchResponse that has
     been built by this SearchResponseBuilder.
     """
     self._protoObject.next_page_token = pb.string(self._nextPageToken)
     s = toJson(self._protoObject)
     return s
Exemplo n.º 14
0
    def searchReadGroupSets(self, datasetId, name=None, bioSampleId=None):
        """
        Returns an iterator over the ReadGroupSets fulfilling the specified
        conditions from the specified Dataset.

        :param str name: Only ReadGroupSets matching the specified name
            will be returned.
        :param str bioSampleId: Only ReadGroups matching the specified
            bioSample will be included in the response.
        :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet`
            objects defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadGroupSetsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bioSampleId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
Exemplo n.º 15
0
    def searchBioSamples(self, datasetId, name=None, individualId=None):
        """
        Returns an iterator over the BioSamples fulfilling the specified
        conditions.

        :param str datasetId: The dataset to search within.
        :param str name: Only BioSamples matching the specified name will
            be returned.
        :param str individualId: Only BioSamples matching matching this
            id will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchBioSamplesRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.individual_id = pb.string(individualId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "biosamples", protocol.SearchBioSamplesResponse)
Exemplo n.º 16
0
    def searchReadGroupSets(self, datasetId, name=None, bioSampleId=None):
        """
        Returns an iterator over the ReadGroupSets fulfilling the specified
        conditions from the specified Dataset.

        :param str name: Only ReadGroupSets matching the specified name
            will be returned.
        :param str bioSampleId: Only ReadGroups matching the specified
            bioSample will be included in the response.
        :return: An iterator over the :class:`ga4gh.protocol.ReadGroupSet`
            objects defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchReadGroupSetsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bioSampleId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "readgroupsets", protocol.SearchReadGroupSetsResponse)
Exemplo n.º 17
0
    def search_bio_samples(self, dataset_id, name=None, individual_id=None):
        """
        Returns an iterator over the BioSamples fulfilling the specified
        conditions.

        :param str dataset_id: The dataset to search within.
        :param str name: Only BioSamples matching the specified name will
            be returned.
        :param str individual_id: Only BioSamples matching matching this
            id will be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchBioSamplesRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.individual_id = pb.string(individual_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "biosamples", protocol.SearchBioSamplesResponse)
Exemplo n.º 18
0
    def search_call_sets(self, variant_set_id, name=None, bio_sample_id=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: Find callsets belonging to the
            provided variant set.
        :param str name: Only CallSets matching the specified name will
            be returned.
        :param str bio_sample_id: Only CallSets matching this id will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variant_set_id
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bio_sample_id)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemplo n.º 19
0
    def searchCallSets(self, variantSetId, name=None, bioSampleId=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str variantSetId: Find callsets belonging to the
            provided variant set.
        :param str name: Only CallSets matching the specified name will
            be returned.
        :param str bioSampleId: Only CallSets matching this id will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variantSetId
        request.name = pb.string(name)
        request.bio_sample_id = pb.string(bioSampleId)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemplo n.º 20
0
    def searchReferences(
            self, referenceSetId, accession=None, md5checksum=None):
        """
        Returns an iterator over the References fulfilling the specified
        conditions from the specified Dataset.

        :param str referenceSetId: The ReferenceSet to search.
        :param str accession: If not None, return the references for which the
            `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not None, return the references for which
            the `md5checksum` matches this string (case-sensitive, exact
            match).
        :return: An iterator over the :class:`ga4gh.protocol.Reference`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferencesRequest()
        request.reference_set_id = referenceSetId
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "references", protocol.SearchReferencesResponse)
Exemplo n.º 21
0
    def search_variants(self,
                        variant_set_id,
                        start=None,
                        end=None,
                        reference_name=None,
                        call_set_ids=None):
        """
        Returns an iterator over the Variants fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.
        :param list call_set_ids: Only return variant calls which belong to
            call sets with these IDs. If an empty array, returns variants
            without any call objects. If null, returns all variant calls.

        :return: An iterator over the :class:`ga4gh.protocol.Variant` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantsRequest()
        request.reference_name = pb.string(reference_name)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.variant_set_id = variant_set_id
        request.call_set_ids.extend(pb.string(call_set_ids))
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "variants",
                                        protocol.SearchVariantsResponse)
Exemplo n.º 22
0
    def search_references(
            self, reference_set_id, accession=None, md5checksum=None):
        """
        Returns an iterator over the References fulfilling the specified
        conditions from the specified Dataset.

        :param str reference_set_id: The ReferenceSet to search.
        :param str accession: If not None, return the references for which the
            `accession` matches this string (case-sensitive, exact match).
        :param str md5checksum: If not None, return the references for which
            the `md5checksum` matches this string (case-sensitive, exact
            match).
        :return: An iterator over the :class:`ga4gh.protocol.Reference`
            objects defined by the query parameters.
        """
        request = protocol.SearchReferencesRequest()
        request.reference_set_id = reference_set_id
        request.accession = pb.string(accession)
        request.md5checksum = pb.string(md5checksum)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "references", protocol.SearchReferencesResponse)
Exemplo n.º 23
0
 def toProtocolElement(self):
     """
     Returns the representation of this FeatureSet as the corresponding
     ProtocolElement.
     """
     gaFeatureSet = protocol.FeatureSet()
     gaFeatureSet.id = self.getId()
     gaFeatureSet.dataset_id = self.getParentContainer().getId()
     gaFeatureSet.reference_set_id = pb.string(self._referenceSet.getId())
     gaFeatureSet.name = self._name
     gaFeatureSet.source_uri = self._sourceUri
     for key in self._info:
         gaFeatureSet.info[key].values.extend(self._info[key])
     return gaFeatureSet
Exemplo n.º 24
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_id = pb.string(self.getSampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     return readGroup
Exemplo n.º 25
0
 def toProtocolElement(self):
     """
     Returns the representation of this FeatureSet as the corresponding
     ProtocolElement.
     """
     gaFeatureSet = protocol.FeatureSet()
     gaFeatureSet.id = self.getId()
     gaFeatureSet.dataset_id = self.getParentContainer().getId()
     gaFeatureSet.reference_set_id = pb.string(self._referenceSet.getId())
     gaFeatureSet.name = self._name
     gaFeatureSet.source_uri = self._sourceUri
     for key in self._info:
         gaFeatureSet.info[key].values.extend(self._info[key])
     return gaFeatureSet
Exemplo n.º 26
0
 def toProtocolElement(self):
     """
     Returns the GA4GH protocol representation of this ReadGroup.
     """
     # TODO this is very incomplete, but we don't have the
     # implementation to fill out the rest of the fields currently
     readGroup = protocol.ReadGroup()
     readGroup.id = self.getId()
     readGroup.created = self._creationTime
     readGroup.updated = self._updateTime
     dataset = self.getParentContainer().getParentContainer()
     readGroup.dataset_id = dataset.getId()
     readGroup.name = self.getLocalId()
     readGroup.predicted_insert_size = pb.int(self.getPredictedInsertSize())
     referenceSet = self._parentContainer.getReferenceSet()
     readGroup.sample_name = pb.string(self.getSampleName())
     readGroup.bio_sample_id = pb.string(self.getBioSampleId())
     if referenceSet is not None:
         readGroup.reference_set_id = referenceSet.getId()
     readGroup.stats.CopyFrom(self.getStats())
     readGroup.programs.extend(self.getPrograms())
     readGroup.description = pb.string(self.getDescription())
     readGroup.experiment.CopyFrom(self.getExperiment())
     return readGroup
Exemplo n.º 27
0
    def search_variants(
            self, variant_set_id, start=None, end=None, reference_name=None,
            call_set_ids=None):
        """
        Returns an iterator over the Variants fulfilling the specified
        conditions from the specified VariantSet.

        :param str variant_set_id: The ID of the
            :class:`ga4gh.protocol.VariantSet` of interest.
        :param int start: Required. The beginning of the window (0-based,
            inclusive) for which overlapping variants should be returned.
            Genomic positions are non-negative integers less than reference
            length. Requests spanning the join of circular genomes are
            represented as two requests one on each side of the join
            (position 0).
        :param int end: Required. The end of the window (0-based, exclusive)
            for which overlapping variants should be returned.
        :param str reference_name: The name of the
            :class:`ga4gh.protocol.Reference` we wish to return variants from.
        :param list call_set_ids: Only return variant calls which belong to
            call sets with these IDs. If an empty array, returns variants
            without any call objects. If null, returns all variant calls.

        :return: An iterator over the :class:`ga4gh.protocol.Variant` objects
            defined by the query parameters.
        :rtype: iter
        """
        request = protocol.SearchVariantsRequest()
        request.reference_name = pb.string(reference_name)
        request.start = pb.int(start)
        request.end = pb.int(end)
        request.variant_set_id = variant_set_id
        request.call_set_ids.extend(pb.string(call_set_ids))
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(
            request, "variants", protocol.SearchVariantsResponse)
Exemplo n.º 28
0
    def searchCallSets(self, variantSetId, name=None):
        """
        Returns an iterator over the CallSets fulfilling the specified
        conditions from the specified VariantSet.

        :param str name: Only CallSets matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.CallSet`
            objects defined by the query parameters.
        """
        request = protocol.SearchCallSetsRequest()
        request.variant_set_id = variantSetId
        request.name = pb.string(name)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "callsets", protocol.SearchCallSetsResponse)
Exemplo n.º 29
0
    def search_individuals(self, dataset_id, name=None):
        """
        Returns an iterator over the Individuals fulfilling the specified
        conditions.

        :param str dataset_id: The dataset to search within.
        :param str name: Only Individuals matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchIndividualsRequest()
        request.dataset_id = dataset_id
        request.name = pb.string(name)
        request.page_size = pb.int(self._page_size)
        return self._run_search_request(request, "individuals",
                                        protocol.SearchIndividualsResponse)
Exemplo n.º 30
0
    def searchIndividuals(self, datasetId, name=None):
        """
        Returns an iterator over the Individuals fulfilling the specified
        conditions.

        :param str datasetId: The dataset to search within.
        :param str name: Only Individuals matching the specified name will
            be returned.
        :return: An iterator over the :class:`ga4gh.protocol.BioSample`
            objects defined by the query parameters.
        """
        request = protocol.SearchIndividualsRequest()
        request.dataset_id = datasetId
        request.name = pb.string(name)
        request.page_size = pb.int(self._pageSize)
        return self._runSearchRequest(
            request, "individuals", protocol.SearchIndividualsResponse)
Exemplo n.º 31
0
 def getExperiment(self):
     """
     Returns the GA4GH protocol representation of this read group's
     Experiment.
     """
     experiment = protocol.Experiment()
     experiment.id = self.getExperimentId()
     experiment.instrument_model = pb.string(self.getInstrumentModel())
     experiment.sequencing_center = pb.string(self.getSequencingCenter())
     experiment.description = pb.string(self.getExperimentDescription())
     experiment.library = pb.string(self.getLibrary())
     experiment.platform_unit = pb.string(self.getPlatformUnit())
     experiment.message_create_time = self._iso8601
     experiment.message_update_time = self._iso8601
     experiment.run_time = pb.string(self.getRunTime())
     return experiment
Exemplo n.º 32
0
    def getGaTermByName(self, name):
        """
        Returns a GA4GH OntologyTerm object by name.

        :param name: name of the ontology term, ex. "gene".
        :return: GA4GH OntologyTerm object.
        """
        # TODO what is the correct value when we have no mapping??
        termIds = self.getTermIds(name)
        if len(termIds) == 0:
            termId = ""
            # TODO add logging for missed term translation.
        else:
            # TODO what is the correct behaviour here when we have multiple
            # IDs matching a given name?
            termId = termIds[0]
        term = protocol.OntologyTerm()
        term.term = name
        term.id = termId
        term.source_name = self._sourceName
        term.source_version = pb.string(self._sourceVersion)
        return term
Exemplo n.º 33
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     return dataset
Exemplo n.º 34
0
 def toProtocolElement(self):
     dataset = protocol.Dataset()
     dataset.id = self.getId()
     dataset.name = pb.string(self.getLocalId())
     dataset.description = pb.string(self.getDescription())
     return dataset