def testTopLevelIdsUnique(self): datasetId = "a" idStr = "b" dataset = datasets.Dataset(datasetId) readGroupSet = reads.AbstractReadGroupSet(dataset, idStr) variantSet = variants.AbstractVariantSet(dataset, idStr) self.assertNotEqual(readGroupSet.getId(), variantSet.getId())
def testCreation(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") localId = "variantAnnotationSetId" simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet( simulatedVariantSet, localId, self.randomSeed) annotations = simulatedVariantAnnotationSet.getVariantAnnotations( self.referenceName, self.startPosition, self.endPosition) self.assertEquals( simulatedVariantSet.toProtocolElement().id, simulatedVariantAnnotationSet.toProtocolElement().variant_set_id, "Variant Set ID should match the annotation's variant set ID") for variant, ann in annotations: self.assertEquals( datetime.datetime.strptime( ann.created, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ann.created, "Expect time format to be in ISO8601") self.assertEqual(variant.id, ann.variant_id)
def testToProtocolElement(self): dataset = datasets.Dataset('dataset1') term = protocol.OntologyTerm() term.term = "male genotypic sex" term.id = "PATO:0020001" term.source_name = "PATO" term.source_version = pb.string("2015-11-18") # Write out a valid input print(protocol.toJsonDict(term)) validIndividual = protocol.Individual(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z", sex=term) validIndividual.info['test'].values.add().string_value = 'test-info' # pass through protocol creation individual = bioMetadata.Individual(dataset, "test") individual.populateFromJson(protocol.toJson(validIndividual)) gaIndividual = individual.toProtocolElement() # Verify elements exist self.assertEqual(gaIndividual.created, validIndividual.created) self.assertEqual(gaIndividual.updated, validIndividual.updated) # Invalid input invalidIndividual = '{"bad:", "json"}' individual = bioMetadata.Individual(dataset, "test") # Should fail self.assertRaises(exceptions.InvalidJsonException, individual.populateFromJson, invalidIndividual)
def testGetDatasetByIndexBadIndex(self): self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 0) self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, None) self.assertRaises(TypeError, self._dataRepo.getDatasetByIndex, "") datasetName = "ds" dataset = datasets.Dataset(datasetName) self._dataRepo.addDataset(dataset) self.assertRaises(IndexError, self._dataRepo.getDatasetByIndex, 1)
def __init__(self, variantSetId, baseDir): self._dataset = datasets.Dataset("ds") super(VariantSetTest, self).__init__(variantSetId, baseDir) self._variantRecords = [] self._reference_names = set() # Read in all the VCF files in datadir and store each variant. for vcfFile in glob.glob(os.path.join(self._dataPath, "*.vcf.gz")): self._readVcf(vcfFile)
def testCreation(self): dataset = datasets.Dataset('dataset1') localId = "readGroupSetId" referenceSet = references.SimulatedReferenceSet("srs1") simulatedReadGroupSet = reads.SimulatedReadGroupSet( dataset, localId, referenceSet) for readGroup in simulatedReadGroupSet.getReadGroups(): alignments = list(readGroup.getReadAlignments()) self.assertGreater(len(alignments), 0)
def addDataset(self): """ Adds a new dataset into this repo. """ self._openRepo() dataset = datasets.Dataset(self._args.datasetName) dataset.setDescription(self._args.description) dataset.setInfo(json.loads(self._args.info)) self._updateRepo(self._repo.insertDataset, dataset)
def _readDatasetTable(self, cursor): cursor.row_factory = sqlite3.Row cursor.execute("SELECT * FROM Dataset;") for row in cursor: dataset = datasets.Dataset(row[b'name']) dataset.populateFromRow(row) assert dataset.getId() == row[b"id"] # Insert the dataset into the memory-based object model. self.addDataset(dataset)
def __init__(self, localId, dataPath): self._backend = backend.Backend(datarepo.AbstractDataRepository()) self._referenceSet = None self._dataset = datasets.Dataset("ds") self._readGroupInfos = {} self._readGroupSetInfo = None self._samFile = pysam.AlignmentFile(dataPath) self._readReferences() super(ReadGroupSetTest, self).__init__(localId, dataPath) self._readAlignmentInfo()
def _getSimulatedVariantSet(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) return simulatedVariantSet
def testAddOneDataset(self): datasetName = "ds" dataset = datasets.Dataset(datasetName) self.assertEqual(self._dataRepo.getNumDatasets(), 0) self.assertEqual(self._dataRepo.getDatasets(), []) self._dataRepo.addDataset(dataset) self.assertEqual(self._dataRepo.getNumDatasets(), 1) self.assertEqual(self._dataRepo.getDatasets(), [dataset]) self.assertEqual(self._dataRepo.getDatasetByIndex(0), dataset) self.assertEqual(self._dataRepo.getDatasetByName(datasetName), dataset) self.assertEqual(self._dataRepo.getDataset(dataset.getId()), dataset)
def testAddMultipleDatasets(self): firstDatasetName = "ds1" firstDataset = datasets.Dataset(firstDatasetName) secondDatasetName = "ds2" secondDataset = datasets.Dataset(secondDatasetName) self.assertEqual(self._dataRepo.getNumDatasets(), 0) self.assertEqual(self._dataRepo.getDatasets(), []) self._dataRepo.addDataset(firstDataset) self._dataRepo.addDataset(secondDataset) self.assertEqual(self._dataRepo.getNumDatasets(), 2) self.assertEqual(self._dataRepo.getDatasets(), [firstDataset, secondDataset]) self.assertEqual(self._dataRepo.getDatasetByIndex(0), firstDataset) self.assertEqual(self._dataRepo.getDatasetByIndex(1), secondDataset) self.assertEqual(self._dataRepo.getDatasetByName(firstDatasetName), firstDataset) self.assertEqual(self._dataRepo.getDatasetByName(secondDatasetName), secondDataset) self.assertEqual(self._dataRepo.getDataset(firstDataset.getId()), firstDataset) self.assertEqual(self._dataRepo.getDataset(secondDataset.getId()), secondDataset)
def getDataModelInstance(self, localId, dataPath): dataset = datasets.Dataset("ds") variantSet = variants.HtslibVariantSet(dataset, localId) variantSet.populateFromDirectory(dataPath) referenceSet = references.AbstractReferenceSet("rs") variantSet.setReferenceSet(referenceSet) if variantSet.isAnnotated(): sequenceOntology = ontologies.Ontology(paths.ontologyName) sequenceOntology.populateFromFile(paths.ontologyPath) annotationSet = variantSet.getVariantAnnotationSets()[0] annotationSet.setOntology(sequenceOntology) return annotationSet else: return variantSet
def __init__(self, featureSetLocalName, dataPath): """ :param localId: Name of the GFF3 resource corresponding to a pair of files, .db and .gff3 :param dataPath: string representing full path to the .db file :return: """ self._dataset = datasets.Dataset(_datasetName) self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._ontology = self._repo.getOntologyByName(paths.ontologyName) self._referenceSet = references.AbstractReferenceSet("test_rs") featureSetLocalName = featureSetLocalName[:-3] # remove '.db' self._testData = _testDataForFeatureSetName[featureSetLocalName] super(FeatureSetTests, self).__init__(featureSetLocalName, dataPath)
def _createVariantAnnotationSet(self, vcfDir): """ Creates a VariantAnnotationSet from the specified directory of VCF files. """ self._variantSetName = "testVariantSet" self._repo = datarepo.SqlDataRepository(paths.testDataRepo) self._repo.open(datarepo.MODE_READ) self._dataset = datasets.Dataset("testDs") self._variantSet = variants.HtslibVariantSet( self._dataset, self._variantSetName) self._variantSet.populateFromDirectory(vcfDir) self._variantAnnotationSet = variants.HtslibVariantAnnotationSet( self._variantSet, "testVAs") self._variantAnnotationSet.setOntology( self._repo.getOntologyByName(paths.ontologyName))
def createRepo(self): """ Creates the repository for all the data we've just downloaded. """ repo = datarepo.SqlDataRepository(self.repoPath) repo.open("w") repo.initialise() referenceSet = references.HtslibReferenceSet("GRCh37-subset") referenceSet.populateFromFile(self.fastaFilePath) referenceSet.setDescription("Subset of GRCh37 used for demonstration") referenceSet.setNcbiTaxonId(9606) for reference in referenceSet.getReferences(): reference.setNcbiTaxonId(9606) reference.setSourceAccessions( self.accessions[reference.getName()] + ".subset") repo.insertReferenceSet(referenceSet) dataset = datasets.Dataset("1kg-p3-subset") dataset.setDescription("Sample data from 1000 Genomes phase 3") repo.insertDataset(dataset) variantSet = variants.HtslibVariantSet(dataset, "mvncall") variantSet.setReferenceSet(referenceSet) dataUrls = [vcfFile for vcfFile, _ in self.vcfFilePaths] indexFiles = [indexFile for _, indexFile in self.vcfFilePaths] variantSet.populateFromFile(dataUrls, indexFiles) variantSet.checkConsistency() repo.insertVariantSet(variantSet) for sample, (bamFile, indexFile) in zip(self.samples, self.bamFilePaths): readGroupSet = reads.HtslibReadGroupSet(dataset, sample) readGroupSet.populateFromFile(bamFile, indexFile) readGroupSet.setReferenceSet(referenceSet) repo.insertReadGroupSet(readGroupSet) repo.commit() repo.close() self.log("Finished creating the repository; summary:\n") repo.open("r") repo.printSummary()
def testToProtocolElement(self): dataset = datasets.Dataset('dataset1') # Write out a valid input validBioSample = protocol.BioSample(name="test", created="2016-05-19T21:00:19Z", updated="2016-05-19T21:00:19Z") validBioSample.info['test'].values.add().string_value = 'test-info' # pass through protocol creation bioSample = bioMetadata.BioSample(dataset, "test") bioSample.populateFromJson(protocol.toJson(validBioSample)) gaBioSample = bioSample.toProtocolElement() # Verify elements exist self.assertEqual(gaBioSample.created, validBioSample.created) self.assertEqual(gaBioSample.updated, validBioSample.updated) # Invalid input invalidBioSample = '{"bad:", "json"}' bioSample = bioMetadata.Individual(dataset, "test") # Should fail self.assertRaises(exceptions.InvalidJsonException, bioSample.populateFromJson, invalidBioSample)
def setUp(self): self._featureSetName = "testFeatureSet" self._dataset = datasets.Dataset("test_ds") self._featureSet = features.AbstractFeatureSet(self._dataset, self._featureSetName)
def setUp(self): self._variantSetName = "testVariantSet" self._dataset = datasets.Dataset("datasetId") self._variantSet = variants.AbstractVariantSet(self._dataset, self._variantSetName)
def setUp(self): self.testDataDir = "tests/faultydata/variants" self.dataset = datasets.Dataset('dataset1')
def run(self): if not os.path.exists(self.outputDirectory): os.makedirs(self.outputDirectory) self.repo.open("w") self.repo.initialise() referenceFileName = "ref_brca1.fa" inputRef = os.path.join(self.inputDirectory, referenceFileName) outputRef = os.path.join(self.outputDirectory, referenceFileName) shutil.copy(inputRef, outputRef) fastaFilePath = os.path.join(self.outputDirectory, referenceFileName + '.gz') pysam.tabix_compress(outputRef, fastaFilePath) with open(os.path.join(self.inputDirectory, "ref_brca1.json")) as refMetadataFile: refMetadata = json.load(refMetadataFile) with open(os.path.join(self.inputDirectory, "referenceset_hg37.json")) as refMetadataFile: refSetMetadata = json.load(refMetadataFile) referenceSet = references.HtslibReferenceSet( refSetMetadata['assemblyId']) referenceSet.populateFromFile(fastaFilePath) referenceSet.setAssemblyId(refSetMetadata['assemblyId']) referenceSet.setDescription(refSetMetadata['description']) referenceSet.setNcbiTaxonId(refSetMetadata['ncbiTaxonId']) referenceSet.setIsDerived(refSetMetadata['isDerived']) referenceSet.setSourceUri(refSetMetadata['sourceUri']) referenceSet.setSourceAccessions(refSetMetadata['sourceAccessions']) for reference in referenceSet.getReferences(): reference.setNcbiTaxonId(refMetadata['ncbiTaxonId']) reference.setSourceAccessions(refMetadata['sourceAccessions']) self.repo.insertReferenceSet(referenceSet) dataset = datasets.Dataset("brca1") # Some info is set, it isn't important what dataset.setInfo({"version": ga4gh.__version__}) self.repo.insertDataset(dataset) hg00096Individual = biodata.Individual(dataset, "HG00096") with open(os.path.join(self.inputDirectory, "individual_HG00096.json")) as jsonString: hg00096Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00096Individual) hg00096BioSample = biodata.BioSample(dataset, "HG00096") with open(os.path.join(self.inputDirectory, "bioSample_HG00096.json")) as jsonString: hg00096BioSample.populateFromJson(jsonString.read()) hg00096BioSample.setIndividualId(hg00096Individual.getId()) self.repo.insertBioSample(hg00096BioSample) hg00099Individual = biodata.Individual(dataset, "HG00099") with open(os.path.join(self.inputDirectory, "individual_HG00099.json")) as jsonString: hg00099Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00099Individual) hg00099BioSample = biodata.BioSample(dataset, "HG00099") with open(os.path.join(self.inputDirectory, "bioSample_HG00099.json")) as jsonString: hg00099BioSample.populateFromJson(jsonString.read()) hg00099BioSample.setIndividualId(hg00099Individual.getId()) self.repo.insertBioSample(hg00099BioSample) hg00101Individual = biodata.Individual(dataset, "HG00101") with open(os.path.join(self.inputDirectory, "individual_HG00101.json")) as jsonString: hg00101Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00101Individual) hg00101BioSample = biodata.BioSample(dataset, "HG00101") with open(os.path.join(self.inputDirectory, "bioSample_HG00101.json")) as jsonString: hg00101BioSample.populateFromJson(jsonString.read()) hg00101BioSample.setIndividualId(hg00101Individual.getId()) self.repo.insertBioSample(hg00101BioSample) readFiles = [ "brca1_HG00096.sam", "brca1_HG00099.sam", "brca1_HG00101.sam" ] for readFile in readFiles: name = readFile.split('_')[1].split('.')[0] readSrc = pysam.AlignmentFile( os.path.join(self.inputDirectory, readFile), "r") readDest = pysam.AlignmentFile(os.path.join( self.outputDirectory, name + ".bam"), "wb", header=readSrc.header) destFilePath = readDest.filename for readData in readSrc: readDest.write(readData) readDest.close() readSrc.close() pysam.index(destFilePath) readGroupSet = reads.HtslibReadGroupSet(dataset, name) readGroupSet.populateFromFile(destFilePath, destFilePath + ".bai") readGroupSet.setReferenceSet(referenceSet) dataset.addReadGroupSet(readGroupSet) bioSamples = [hg00096BioSample, hg00099BioSample, hg00101BioSample] for readGroup in readGroupSet.getReadGroups(): for bioSample in bioSamples: if bioSample.getLocalId() == readGroup.getSampleName(): readGroup.setBioSampleId(bioSample.getId()) self.repo.insertReadGroupSet(readGroupSet) ontologyMapFileName = "so-xp-simple.obo" inputOntologyMap = os.path.join(self.inputDirectory, ontologyMapFileName) outputOntologyMap = os.path.join(self.outputDirectory, ontologyMapFileName) shutil.copy(inputOntologyMap, outputOntologyMap) sequenceOntology = ontologies.Ontology("so-xp-simple") sequenceOntology.populateFromFile(outputOntologyMap) sequenceOntology._id = "so-xp-simple" self.repo.insertOntology(sequenceOntology) self.repo.addOntology(sequenceOntology) vcfFiles = [ "brca1_1kgPhase3_variants.vcf", "brca1_WASH7P_annotation.vcf", "brca1_OR4F_annotation.vcf" ] for vcfFile in vcfFiles: self.addVariantSet(vcfFile, dataset, referenceSet, sequenceOntology, bioSamples) # Sequence annotations seqAnnFile = "brca1_gencodev19.gff3" seqAnnSrc = os.path.join(self.inputDirectory, seqAnnFile) seqAnnDest = os.path.join(self.outputDirectory, "gencodev19.db") dbgen = generate_gff3_db.Gff32Db(seqAnnSrc, seqAnnDest) dbgen.run() gencode = sequence_annotations.Gff3DbFeatureSet(dataset, "gencodev19") gencode.setOntology(sequenceOntology) gencode.populateFromFile(seqAnnDest) gencode.setReferenceSet(referenceSet) self.repo.insertFeatureSet(gencode) # add g2p featureSet g2pPath = os.path.join(self.inputDirectory, "cgd") # copy all files input directory to output path outputG2PPath = os.path.join(self.outputDirectory, "cgd") os.makedirs(outputG2PPath) for filename in glob.glob(os.path.join(g2pPath, '*.*')): shutil.copy(filename, outputG2PPath) featuresetG2P = g2p_featureset.PhenotypeAssociationFeatureSet( dataset, outputG2PPath) featuresetG2P.setOntology(sequenceOntology) featuresetG2P.setReferenceSet(referenceSet) featuresetG2P.populateFromFile(outputG2PPath) self.repo.insertFeatureSet(featuresetG2P) # add g2p phenotypeAssociationSet phenotypeAssociationSet = g2p_associationset\ .RdfPhenotypeAssociationSet(dataset, "cgd", outputG2PPath) self.repo.insertPhenotypeAssociationSet(phenotypeAssociationSet) self.repo.commit() dataset.addFeatureSet(gencode) # RNA Quantification rnaDbName = os.path.join(self.outputDirectory, "rnaseq.db") store = rnaseq2ga.RnaSqliteStore(rnaDbName) store.createTables() rnaseq2ga.rnaseq2ga(self.inputDirectory + "/rna_brca1.tsv", rnaDbName, "rna_brca1.tsv", "rsem", featureType="transcript", readGroupSetNames="HG00096", featureSetNames="gencodev19", dataset=dataset) rnaQuantificationSet = rna_quantification.SqliteRnaQuantificationSet( dataset, "rnaseq") rnaQuantificationSet.setReferenceSet(referenceSet) rnaQuantificationSet.populateFromFile(rnaDbName) self.repo.insertRnaQuantificationSet(rnaQuantificationSet) self.repo.commit()
def getDataset(self): return datasets.Dataset("dataset")
def run(self): if not os.path.exists(self.outputDirectory): os.makedirs(self.outputDirectory) self.repo.open("w") self.repo.initialise() referenceFileName = "ref_brca1.fa" inputRef = os.path.join( self.inputDirectory, referenceFileName) outputRef = os.path.join( self.outputDirectory, referenceFileName) shutil.copy(inputRef, outputRef) fastaFilePath = os.path.join( self.outputDirectory, referenceFileName + '.gz') pysam.tabix_compress( outputRef, fastaFilePath) with open( os.path.join( self.inputDirectory, "ref_brca1.json")) as refMetadataFile: refMetadata = json.load(refMetadataFile) with open( os.path.join( self.inputDirectory, "referenceset_hg37.json")) as refMetadataFile: refSetMetadata = json.load(refMetadataFile) referenceSet = references.HtslibReferenceSet( refSetMetadata['assemblyId']) referenceSet.populateFromFile(fastaFilePath) referenceSet.setAssemblyId(refSetMetadata['assemblyId']) referenceSet.setDescription(refSetMetadata['description']) referenceSet.setNcbiTaxonId(refSetMetadata['ncbiTaxonId']) referenceSet.setIsDerived(refSetMetadata['isDerived']) referenceSet.setSourceUri(refSetMetadata['sourceUri']) referenceSet.setSourceAccessions(refSetMetadata['sourceAccessions']) for reference in referenceSet.getReferences(): reference.setNcbiTaxonId(refMetadata['ncbiTaxonId']) reference.setSourceAccessions( refMetadata['sourceAccessions']) self.repo.insertReferenceSet(referenceSet) dataset = datasets.Dataset("brca1") self.repo.insertDataset(dataset) hg00096Individual = biodata.Individual(dataset, "HG00096") with open( os.path.join( self.inputDirectory, "individual_HG00096.json")) as jsonString: hg00096Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00096Individual) hg00096BioSample = biodata.BioSample(dataset, "HG00096") with open( os.path.join( self.inputDirectory, "bioSample_HG00096.json")) as jsonString: hg00096BioSample.populateFromJson(jsonString.read()) hg00096BioSample.setIndividualId(hg00096Individual.getId()) self.repo.insertBioSample(hg00096BioSample) hg00099Individual = biodata.Individual(dataset, "HG00099") with open( os.path.join( self.inputDirectory, "individual_HG00099.json")) as jsonString: hg00099Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00099Individual) hg00099BioSample = biodata.BioSample(dataset, "HG00099") with open( os.path.join( self.inputDirectory, "bioSample_HG00099.json")) as jsonString: hg00099BioSample.populateFromJson(jsonString.read()) hg00099BioSample.setIndividualId(hg00099Individual.getId()) self.repo.insertBioSample(hg00099BioSample) hg00101Individual = biodata.Individual(dataset, "HG00101") with open( os.path.join( self.inputDirectory, "individual_HG00101.json")) as jsonString: hg00101Individual.populateFromJson(jsonString.read()) self.repo.insertIndividual(hg00101Individual) hg00101BioSample = biodata.BioSample(dataset, "HG00101") with open( os.path.join( self.inputDirectory, "bioSample_HG00101.json")) as jsonString: hg00101BioSample.populateFromJson(jsonString.read()) hg00101BioSample.setIndividualId(hg00101Individual.getId()) self.repo.insertBioSample(hg00101BioSample) readFiles = [ "brca1_HG00096.sam", "brca1_HG00099.sam", "brca1_HG00101.sam"] for readFile in readFiles: name = readFile.split('_')[1].split('.')[0] readSrc = pysam.AlignmentFile( os.path.join(self.inputDirectory, readFile), "r") readDest = pysam.AlignmentFile( os.path.join( self.outputDirectory, name + ".bam"), "wb", header=readSrc.header) destFilePath = readDest.filename for readData in readSrc: readDest.write(readData) readDest.close() readSrc.close() pysam.index(destFilePath) readGroupSet = reads.HtslibReadGroupSet(dataset, name) readGroupSet.populateFromFile(destFilePath, destFilePath + ".bai") readGroupSet.setReferenceSet(referenceSet) bioSamples = [hg00096BioSample, hg00099BioSample, hg00101BioSample] for readGroup in readGroupSet.getReadGroups(): for bioSample in bioSamples: if bioSample.getLocalId() == readGroup.getSampleName(): readGroup.setBioSampleId(bioSample.getId()) self.repo.insertReadGroupSet(readGroupSet) ontologyMapFileName = "so-xp-simple.obo" inputOntologyMap = os.path.join( self.inputDirectory, ontologyMapFileName) outputOntologyMap = os.path.join( self.outputDirectory, ontologyMapFileName) shutil.copy(inputOntologyMap, outputOntologyMap) sequenceOntology = ontologies.Ontology("so-xp-simple") sequenceOntology.populateFromFile(outputOntologyMap) sequenceOntology._id = "so-xp-simple" self.repo.insertOntology(sequenceOntology) self.repo.addOntology(sequenceOntology) vcfFiles = [ "brca1_1kgPhase3_variants.vcf", "brca1_WASH7P_annotation.vcf", "brca1_OR4F_annotation.vcf"] for vcfFile in vcfFiles: self.addVariantSet( vcfFile, dataset, referenceSet, sequenceOntology, bioSamples) seqAnnFile = "brca1_gencodev19.gff3" seqAnnSrc = os.path.join(self.inputDirectory, seqAnnFile) seqAnnDest = os.path.join(self.outputDirectory, "gencodev19.db") dbgen = generate_gff3_db.Gff32Db(seqAnnSrc, seqAnnDest) dbgen.run() gencode = sequenceAnnotations.Gff3DbFeatureSet(dataset, "gencodev19") gencode.setOntology(sequenceOntology) gencode.populateFromFile(seqAnnDest) gencode.setReferenceSet(referenceSet) self.repo.insertFeatureSet(gencode) self.repo.commit() print("Done converting compliance data.", file=sys.stderr)
def setUp(self): self._featureSetName = "testFeatureSet" self._dataset = datasets.Dataset("test_ds") self._featureSet = sequence_annotations.AbstractFeatureSet( self._dataset, self._featureSetName)
def __init__(self, localId, baseDir): self._dataset = datasets.Dataset("ds") super(PhenotypeAssociationSetTest, self).__init__(localId, baseDir) self.phenotypeAssocationSet = self.getDataModelInstance( localId, baseDir)