def _getSimulatedVariantSet(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) return simulatedVariantSet
def testCreation(self): dataset = datasets.Dataset('dataset1') referenceSet = references.SimulatedReferenceSet("srs1") localId = "variantAnnotationSetId" simulatedVariantSet = variants.SimulatedVariantSet( dataset, referenceSet, 'variantSet1', randomSeed=self.randomSeed, numCalls=self.numCalls, variantDensity=self.variantDensity) simulatedVariantAnnotationSet = variants.SimulatedVariantAnnotationSet( simulatedVariantSet, localId, self.randomSeed) annotations = simulatedVariantAnnotationSet.getVariantAnnotations( self.referenceName, self.startPosition, self.endPosition) self.assertEquals( simulatedVariantSet.toProtocolElement().id, simulatedVariantAnnotationSet.toProtocolElement().variant_set_id, "Variant Set ID should match the annotation's variant set ID") for variant, ann in annotations: self.assertEquals( datetime.datetime.strptime( ann.created, "%Y-%m-%dT%H:%M:%S.%fZ").strftime("%Y-%m-%dT%H:%M:%S.%fZ"), ann.created, "Expect time format to be in ISO8601") self.assertEqual(variant.id, ann.variant_id)
def __init__(self, localId, referenceSet, randomSeed=0, numVariantSets=1, numCalls=1, variantDensity=0.5, numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, numAlignments=1, numFeatureSets=1, numPhenotypeAssociationSets=1, numPhenotypeAssociations=2, numRnaQuantSets=2, numExpressionLevels=2): super(SimulatedDataset, self).__init__(localId) self._description = "Simulated dataset {}".format(localId) for i in range(numPhenotypeAssociationSets): localId = "simPas{}".format(i) seed = randomSeed + i phenotypeAssociationSet = g2p.SimulatedPhenotypeAssociationSet( self, localId, seed, numPhenotypeAssociations) self.addPhenotypeAssociationSet(phenotypeAssociationSet) # TODO create a simulated Ontology # Variants for i in range(numVariantSets): localId = "simVs{}".format(i) seed = randomSeed + i variantSet = variants.SimulatedVariantSet(self, referenceSet, localId, seed, numCalls, variantDensity) callSets = variantSet.getCallSets() # Add biosamples for callSet in callSets: bioSample = biodata.BioSample(self, callSet.getLocalId()) bioSample2 = biodata.BioSample(self, callSet.getLocalId() + "2") individual = biodata.Individual(self, callSet.getLocalId()) bioSample.setIndividualId(individual.getId()) bioSample2.setIndividualId(individual.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addBioSample(bioSample2) self.addVariantSet(variantSet) variantAnnotationSet = variants.SimulatedVariantAnnotationSet( variantSet, "simVas{}".format(i), seed) variantSet.addVariantAnnotationSet(variantAnnotationSet) # Reads for i in range(numReadGroupSets): localId = 'simRgs{}'.format(i) seed = randomSeed + i readGroupSet = reads.SimulatedReadGroupSet( self, localId, referenceSet, seed, numReadGroupsPerReadGroupSet, numAlignments) for rg in readGroupSet.getReadGroups(): bioSample = biodata.BioSample(self, rg.getLocalId()) individual = biodata.Individual(self, rg.getLocalId()) bioSample.setIndividualId(individual.getId()) rg.setBioSampleId(bioSample.getId()) self.addIndividual(individual) self.addBioSample(bioSample) self.addReadGroupSet(readGroupSet) # Features for i in range(numFeatureSets): localId = "simFs{}".format(i) seed = randomSeed + i featureSet = sequence_annotations.SimulatedFeatureSet( self, localId, seed) featureSet.setReferenceSet(referenceSet) self.addFeatureSet(featureSet) # RnaQuantificationSets for i in range(numRnaQuantSets): localId = 'simRqs{}'.format(i) rnaQuantSet = rnaQuantification.SimulatedRnaQuantificationSet( self, localId) rnaQuantSet.setReferenceSet(referenceSet) self.addRnaQuantificationSet(rnaQuantSet)