Пример #1
0
 def addFeatureSet(self):
     """
     Adds a new feature set into this repo
     """
     self._openRepo()
     dataset = self._repo.getDatasetByName(self._args.datasetName)
     filePath = self._getFilePath(self._args.filePath,
                                  self._args.relativePath)
     name = getNameFromPath(self._args.filePath)
     featureSet = sequence_annotations.Gff3DbFeatureSet(
         dataset, name)
     referenceSetName = self._args.referenceSetName
     if referenceSetName is None:
         raise exceptions.RepoManagerException(
             "A reference set name must be provided")
     referenceSet = self._repo.getReferenceSetByName(referenceSetName)
     featureSet.setReferenceSet(referenceSet)
     ontologyName = self._args.ontologyName
     if ontologyName is None:
         raise exceptions.RepoManagerException(
             "A sequence ontology name must be provided")
     ontology = self._repo.getOntologyByName(ontologyName)
     self._checkSequenceOntology(ontology)
     featureSet.setOntology(ontology)
     featureSet.populateFromFile(filePath)
     featureSet.setAttributes(json.loads(self._args.attributes))
     self._updateRepo(self._repo.insertFeatureSet, featureSet)
Пример #2
0
 def insertVariantSet(self, variantSet):
     """
     Inserts a the specified variantSet into this repository.
     """
     # We cheat a little here with the VariantSetMetadata, and encode these
     # within the table as a JSON dump. These should really be stored in
     # their own table
     metadataJson = json.dumps(
         [protocol.toJsonDict(metadata) for metadata in
          variantSet.getMetadata()])
     urlMapJson = json.dumps(variantSet.getReferenceToDataUrlIndexMap())
     try:
         m.Variantset.create(
             id=variantSet.getId(),
             datasetid=variantSet.getParentContainer().getId(),
             referencesetid=variantSet.getReferenceSet().getId(),
             name=variantSet.getLocalId(),
             created=datetime.datetime.now(),
             updated=datetime.datetime.now(),
             metadata=metadataJson,
             dataurlindexmap=urlMapJson,
             attributes=json.dumps(variantSet.getAttributes()))
     except Exception as e:
         raise exceptions.RepoManagerException(e)
     for callSet in variantSet.getCallSets():
         self.insertCallSet(callSet)
Пример #3
0
 def setColNum(self, header, name, defaultNum=None):
     colNum = defaultNum
     try:
         colNum = header.index(name)
     except:
         if defaultNum is None:
             raise exceptions.RepoManagerException(
                 "Missing {} column in expression table.".format(name))
     return colNum
Пример #4
0
 def init(self):
     forceMessage = (
         "Respository '{}' already exists. Use --force to overwrite")
     if self._repo.exists():
         if self._args.force:
             self._repo.delete()
         else:
             raise exceptions.RepoManagerException(
                 forceMessage.format(self._registryPath))
     self._updateRepo(self._repo.initialise)
Пример #5
0
 def insertCallSet(self, callSet):
     """
     Inserts a the specified callSet into this repository.
     """
     try:
         m.Callset.create(
             id=callSet.getId(),
             name=callSet.getLocalId(),
             variantsetid=callSet.getParentContainer().getId(),
             biosampleid=callSet.getBiosampleId(),
             attributes=json.dumps(callSet.getAttributes()))
     except Exception as e:
         raise exceptions.RepoManagerException(e)
Пример #6
0
 def insertFeatureSet(self, featureSet):
     """
     Inserts a the specified featureSet into this repository.
     """
     # TODO add support for info and sourceUri fields.
     try:
         m.Featureset.create(
             id=featureSet.getId(),
             datasetid=featureSet.getParentContainer().getId(),
             referencesetid=featureSet.getReferenceSet().getId(),
             ontologyid=featureSet.getOntology().getId(),
             name=featureSet.getLocalId(),
             dataurl=featureSet.getDataUrl(),
             attributes=json.dumps(featureSet.getAttributes()))
     except Exception as e:
         raise exceptions.RepoManagerException(e)
Пример #7
0
 def insertVariantAnnotationSet(self, variantAnnotationSet):
     """
     Inserts a the specified variantAnnotationSet into this repository.
     """
     analysisJson = json.dumps(
         protocol.toJsonDict(variantAnnotationSet.getAnalysis()))
     try:
         m.Variantannotationset.create(
             id=variantAnnotationSet.getId(),
             variantsetid=variantAnnotationSet.getParentContainer().getId(),
             ontologyid=variantAnnotationSet.getOntology().getId(),
             name=variantAnnotationSet.getLocalId(),
             analysis=analysisJson,
             annotationtype=variantAnnotationSet.getAnnotationType(),
             created=variantAnnotationSet.getCreationTime(),
             updated=variantAnnotationSet.getUpdatedTime(),
             attributes=json.dumps(variantAnnotationSet.getAttributes()))
     except Exception as e:
         raise exceptions.RepoManagerException(e)
Пример #8
0
 def removeReferenceSet(self, referenceSet):
     """
     Removes the specified referenceSet from this repository. This performs
     a cascading removal of all references within this referenceSet.
     However, it does not remove any of the ReadGroupSets or items that
     refer to this ReferenceSet. These must be deleted before the
     referenceSet can be removed.
     """
     try:
         q = m.Reference.delete().where(
                 m.Reference.referencesetid == referenceSet.getId())
         q.execute()
         q = m.Referenceset.delete().where(
                 m.Referenceset.id == referenceSet.getId())
         q.execute()
     except Exception:
         msg = ("Unable to delete reference set.  "
                "There are objects currently in the registry which are "
                "aligned against it.  Remove these objects before removing "
                "the reference set.")
         raise exceptions.RepoManagerException(msg)
Пример #9
0
 def insertReadGroup(self, readGroup):
     """
     Inserts the specified readGroup into the DB.
     """
     statsJson = json.dumps(protocol.toJsonDict(readGroup.getStats()))
     experimentJson = json.dumps(
         protocol.toJsonDict(readGroup.getExperiment()))
     try:
         m.Readgroup.create(
             id=readGroup.getId(),
             readgroupsetid=readGroup.getParentContainer().getId(),
             name=readGroup.getLocalId(),
             predictedinsertedsize=readGroup.getPredictedInsertSize(),
             samplename=readGroup.getSampleName(),
             description=readGroup.getDescription(),
             stats=statsJson,
             experiment=experimentJson,
             biosampleid=readGroup.getBiosampleId(),
             attributes=json.dumps(readGroup.getAttributes()))
     except Exception as e:
         raise exceptions.RepoManagerException(e)
Пример #10
0
 def addRnaQuantificationSet(self):
     """
     Adds an rnaQuantificationSet into this repo
     """
     self._openRepo()
     dataset = self._repo.getDatasetByName(self._args.datasetName)
     if self._args.name is None:
         name = getNameFromPath(self._args.filePath)
     else:
         name = self._args.name
     rnaQuantificationSet = rna_quantification.SqliteRnaQuantificationSet(
         dataset, name)
     referenceSetName = self._args.referenceSetName
     if referenceSetName is None:
         raise exceptions.RepoManagerException(
             "A reference set name must be provided")
     referenceSet = self._repo.getReferenceSetByName(referenceSetName)
     rnaQuantificationSet.setReferenceSet(referenceSet)
     rnaQuantificationSet.populateFromFile(self._args.filePath)
     rnaQuantificationSet.setAttributes(json.loads(self._args.attributes))
     self._updateRepo(
         self._repo.insertRnaQuantificationSet, rnaQuantificationSet)
Пример #11
0
 def insertReadGroupSet(self, readGroupSet):
     """
     Inserts a the specified readGroupSet into this repository.
     """
     programsJson = json.dumps(
         [protocol.toJsonDict(program) for program in
          readGroupSet.getPrograms()])
     statsJson = json.dumps(protocol.toJsonDict(readGroupSet.getStats()))
     try:
         m.Readgroupset.create(
             id=readGroupSet.getId(),
             datasetid=readGroupSet.getParentContainer().getId(),
             referencesetid=readGroupSet.getReferenceSet().getId(),
             name=readGroupSet.getLocalId(),
             programs=programsJson,
             stats=statsJson,
             dataurl=readGroupSet.getDataUrl(),
             indexfile=readGroupSet.getIndexFile(),
             attributes=json.dumps(readGroupSet.getAttributes()))
         for readGroup in readGroupSet.getReadGroups():
             self.insertReadGroup(readGroup)
     except Exception as e:
         raise exceptions.RepoManagerException(e)
Пример #12
0
 def _openRepo(self):
     if not self._repo.exists():
         raise exceptions.RepoManagerException(
             "Repo '{}' does not exist. Please create a new repo "
             "using the 'init' command.".format(self._registryPath))
     self._repo.open(datarepo.MODE_READ)
Пример #13
0
    def addVariantSet(self):
        """
        Adds a new VariantSet into this repo.
        """
        self._openRepo()
        dataset = self._repo.getDatasetByName(self._args.datasetName)
        dataUrls = self._args.dataFiles
        name = self._args.name
        if len(dataUrls) == 1:
            if self._args.name is None:
                name = getNameFromPath(dataUrls[0])
            if os.path.isdir(dataUrls[0]):
                # Read in the VCF files from the directory.
                # TODO support uncompressed VCF and BCF files
                vcfDir = dataUrls[0]
                pattern = os.path.join(vcfDir, "*.vcf.gz")
                dataUrls = glob.glob(pattern)
                if len(dataUrls) == 0:
                    raise exceptions.RepoManagerException(
                        "Cannot find any VCF files in the directory "
                        "'{}'.".format(vcfDir))
                dataUrls[0] = self._getFilePath(dataUrls[0],
                                                self._args.relativePath)
        elif self._args.name is None:
            raise exceptions.RepoManagerException(
                "Cannot infer the intended name of the VariantSet when "
                "more than one VCF file is provided. Please provide a "
                "name argument using --name.")
        parsed = urlparse.urlparse(dataUrls[0])
        if parsed.scheme not in ['http', 'ftp']:
            dataUrls = map(lambda url: self._getFilePath(
                url, self._args.relativePath), dataUrls)
        # Now, get the index files for the data files that we've now obtained.
        indexFiles = self._args.indexFiles
        if indexFiles is None:
            # First check if all the paths exist locally, as they must
            # if we are making a default index path.
            for dataUrl in dataUrls:
                if not os.path.exists(dataUrl):
                    raise exceptions.MissingIndexException(
                        "Cannot find file '{}'. All variant files must be "
                        "stored locally if the default index location is "
                        "used. If you are trying to create a VariantSet "
                        "based on remote URLs, please download the index "
                        "files to the local file system and provide them "
                        "with the --indexFiles argument".format(dataUrl))
            # We assume that the indexes are made by adding .tbi
            indexSuffix = ".tbi"
            # TODO support BCF input properly here by adding .csi
            indexFiles = [filename + indexSuffix for filename in dataUrls]
        indexFiles = map(lambda url: self._getFilePath(
            url, self._args.relativePath), indexFiles)
        variantSet = variants.HtslibVariantSet(dataset, name)
        variantSet.populateFromFile(dataUrls, indexFiles)
        # Get the reference set that is associated with the variant set.
        referenceSetName = self._args.referenceSetName
        if referenceSetName is None:
            # Try to find a reference set name from the VCF header.
            referenceSetName = variantSet.getVcfHeaderReferenceSetName()
        if referenceSetName is None:
            raise exceptions.RepoManagerException(
                "Cannot infer the ReferenceSet from the VCF header. Please "
                "specify the ReferenceSet to associate with this "
                "VariantSet using the --referenceSetName option")
        referenceSet = self._repo.getReferenceSetByName(referenceSetName)
        variantSet.setReferenceSet(referenceSet)
        variantSet.setAttributes(json.loads(self._args.attributes))
        # Now check for annotations
        annotationSets = []
        if variantSet.isAnnotated() and self._args.addAnnotationSets:
            ontologyName = self._args.ontologyName
            if ontologyName is None:
                raise exceptions.RepoManagerException(
                    "A sequence ontology name must be provided")
            ontology = self._repo.getOntologyByName(ontologyName)
            self._checkSequenceOntology(ontology)
            for annotationSet in variantSet.getVariantAnnotationSets():
                annotationSet.setOntology(ontology)
                annotationSets.append(annotationSet)

        # Add the annotation sets and the variant set as an atomic update
        def updateRepo():
            self._repo.insertVariantSet(variantSet)
            for annotationSet in annotationSets:
                self._repo.insertVariantAnnotationSet(annotationSet)
        self._updateRepo(updateRepo)
Пример #14
0
 def _checkSequenceOntology(self, ontology):
     so = ontologies.SEQUENCE_ONTOLOGY_PREFIX
     if ontology.getOntologyPrefix() != so:
         raise exceptions.RepoManagerException(
             "Ontology '{}' does not have ontology prefix '{}'".format(
                 ontology.getName(), so))