def getDataModelInstance(self, localId, dataPath): # Return a VA set if it is one if self._isAnnotated(): self._variantSet = variants.HtslibVariantSet( self._dataset, "vs", self._dataPath, None) return variants.HtslibVariantAnnotationSet(self._dataset, localId, dataPath, self._datarepo, self._variantSet) else: return variants.HtslibVariantSet(self._dataset, localId, dataPath, None)
def testThousandGenomesAnnotation(self): variantAnnotationSet = \ variants.HtslibVariantAnnotationSet( self._dataset, "vas", "tests/data/datasets/dataset1/variants/1kg.3.annotations", self._backend, self._variantSet) isAnnotated = variantAnnotationSet.isAnnotated( "tests/data/datasets/dataset1/variants/1kg.3.annotations") self.assertEqual( isAnnotated, True)
def setUp(self): self._variantSetName = "testVariantSet" self._backend = datarepo.FileSystemDataRepository("tests/data") self._dataset = datasets.AbstractDataset(self._backend) self._variantSet = variants.AbstractVariantSet( self._dataset, self._variantSetName) self._variantAnnotationSet = \ variants.HtslibVariantAnnotationSet( self._dataset, "vas", "tests/data/datasets/dataset1/variants/WASH7P_annotation", self._backend, self._variantSet)
def _readVariantAnnotationSetTable(self, cursor): cursor.row_factory = sqlite3.Row cursor.execute("SELECT * FROM VariantAnnotationSet;") for row in cursor: variantSet = self.getVariantSet(row[b'variantSetId']) ontology = self.getOntology(row[b'ontologyId']) variantAnnotationSet = variants.HtslibVariantAnnotationSet( variantSet, row[b'name']) variantAnnotationSet.setOntology(ontology) variantAnnotationSet.populateFromRow(row) assert variantAnnotationSet.getId() == row[b'id'] # Insert the variantAnnotationSet into the memory-based model. variantSet.addVariantAnnotationSet(variantAnnotationSet)
def _createVariantAnnotationSet(self, vcfDir): """ Creates a VariantAnnotationSet from the specified directory of VCF files. """ self._variantSetName = "testVariantSet" self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._dataset = datasets.Dataset("testDs") self._variantSet = variants.HtslibVariantSet( self._dataset, self._variantSetName) self._variantSet.populateFromDirectory(vcfDir) self._variantAnnotationSet = variants.HtslibVariantAnnotationSet( self._variantSet, "testVAs") self._variantAnnotationSet.setOntology( self._repo.getOntologyByName(paths.ontologyName))
def __init__(self, localId, dataDir, dataRepository): super(FileSystemDataset, self).__init__(localId) self._dataDir = dataDir self._setMetadata() # Variants variantSetDir = os.path.join(dataDir, self.variantsDirName) for localId in os.listdir(variantSetDir): relativePath = os.path.join(variantSetDir, localId) if os.path.isdir(relativePath): variantSet = variants.HtslibVariantSet(self, localId, relativePath, dataRepository) self.addVariantSet(variantSet) # Variant annotations sets if variantSet.isAnnotated(relativePath): variantAnnotationSet = variants.HtslibVariantAnnotationSet( self, localId, relativePath, dataRepository, variantSet) self.addVariantAnnotationSet(variantAnnotationSet) # Reads readGroupSetDir = os.path.join(dataDir, self.readsDirName) for filename in os.listdir(readGroupSetDir): if fnmatch.fnmatch(filename, '*.bam'): localId, _ = os.path.splitext(filename) bamPath = os.path.join(readGroupSetDir, filename) readGroupSet = reads.HtslibReadGroupSet( self, localId, bamPath, dataRepository) self.addReadGroupSet(readGroupSet) # Sequence Annotations featureSetDir = os.path.join(dataDir, self.featuresDirName) for filename in os.listdir(featureSetDir): if fnmatch.fnmatch(filename, '*.db'): localId, _ = os.path.splitext(filename) fullPath = os.path.join(featureSetDir, filename) featureSet = sequenceAnnotations.Gff3DbFeatureSet( self, localId, fullPath, dataRepository) self.addFeatureSet(featureSet)