def _gaFeatureForFeatureDbRecord(self, feature): """ :param feature: The DB Row representing a feature :return: the corresponding GA4GH protocol.Feature object """ gaFeature = protocol.Feature() gaFeature.id = self.getCompoundIdForFeatureId(feature['id']) if feature.get('parent_id'): gaFeature.parent_id = self.getCompoundIdForFeatureId( feature['parent_id']) else: gaFeature.parent_id = "" gaFeature.feature_set_id = self.getId() gaFeature.reference_name = pb.string(feature.get('reference_name')) gaFeature.start = pb.int(feature.get('start')) gaFeature.end = pb.int(feature.get('end')) gaFeature.name = pb.string(feature.get('name')) if feature.get('strand', '') == '-': gaFeature.strand = protocol.NEG_STRAND else: # default to positive strand gaFeature.strand = protocol.POS_STRAND gaFeature.child_ids.extend( map(self.getCompoundIdForFeatureId, json.loads(feature['child_ids']))) gaFeature.feature_type.CopyFrom( self._ontology.getGaTermByName(feature['type'])) attributes = json.loads(feature['attributes']) # TODO: Identify which values are ExternalIdentifiers and OntologyTerms for key in attributes: for v in attributes[key]: gaFeature.attributes.attr[key].values.add().string_value = v if 'gene_name' in attributes and len(attributes['gene_name']) > 0: gaFeature.gene_symbol = pb.string(attributes['gene_name'][0]) return gaFeature
def _getFeatureById(self, featureId): """ find a feature and return ga4gh representation, use 'native' id as featureId """ featureRef = rdflib.URIRef(featureId) featureDetails = self._detailTuples([featureRef]) feature = {} for detail in featureDetails: feature[detail['predicate']] = [] for detail in featureDetails: feature[detail['predicate']].append(detail['object']) pbFeature = protocol.Feature() term = protocol.OntologyTerm() # Schema for feature only supports one type of `type` # here we default to first OBO defined for featureType in sorted(feature[TYPE]): if "obolibrary" in featureType: term.term = self._featureTypeLabel(featureType) term.term_id = featureType pbFeature.feature_type.MergeFrom(term) break pbFeature.id = featureId # Schema for feature only supports one type of `name` `symbol` # here we default to shortest for symbol and longest for name feature[LABEL].sort(key=len) pbFeature.gene_symbol = feature[LABEL][0] pbFeature.name = feature[LABEL][-1] pbFeature.attributes.MergeFrom(protocol.Attributes()) for key in feature: for val in sorted(feature[key]): pbFeature.attributes.attr[key].values.add().string_value = val if featureId in self._locationMap: location = self._locationMap[featureId] pbFeature.reference_name = location["chromosome"] pbFeature.start = location["begin"] pbFeature.end = location["end"] return pbFeature
def _generateSimulatedFeature(self, randomNumberGenerator): feature = protocol.Feature() feature.feature_set_id = self.getId() feature.start = randomNumberGenerator.randint(1000, 2000) feature.end = feature.start + randomNumberGenerator.randint(1, 100) feature.feature_type.CopyFrom( self._getRandomfeatureType(randomNumberGenerator)) references = ["chr1", "chr2", "chrX"] feature.reference_name = randomNumberGenerator.choice(references) strands = [protocol.POS_STRAND, protocol.NEG_STRAND] feature.strand = randomNumberGenerator.choice(strands) attributes = { "gene_name": "Frances", "gene_type": "mRNA", "gene_status": "UNKNOWN" } for key, value in attributes.items(): feature.attributes.attr[key].values.add().string_value = value return feature