Exemplo n.º 1
0
    def add_drugability_props_to_genome(self,
                                        name,
                                        description,
                                        target,
                                        _type,
                                        options=None,
                                        user="******"):
        """
        ptype: SeqColDruggabilityParamTypes
        """

        if not self.has_druggability_param(name):
            dp = SeqColDruggabilityParam(name=name,
                                         description=description,
                                         target=target,
                                         type=_type,
                                         uploader=user)
            if options:
                dp.options = options
            self.druggabilityParams.append(dp)
Exemplo n.º 2
0
    def _add_drugability_props_to_genome(self, genome):

        for name, description, target, _type, options, dgo, do, dv in BioCyc.protein_pathway_search_params + BioCyc.pathways_search_params:
            if not genome.has_druggability_param(name):
                dp = SeqColDruggabilityParam(name=name, description=description, target=target,
                                             type=_type, uploader=self.user)
                if options:
                    dp.options = options

                dp.defaultGroupOperation = dgo
                dp.defaultOperation = do
                dp.defaultValue = dv

                genome.druggabilityParams.append(dp)
Exemplo n.º 3
0
    def update_genome_props(self):
        if self.user == "demo":
            user2 = ""
        else:
            user2 = self.user + "."
        search_params = [("resistance", "Associated with resistance",
                          "variant-db", SeqColDruggabilityParamTypes.value,
                          ["true", "false"], "true", "equal", "avg")]
        search_params = search_params + [
            (x.lower(), "Associated with " + x + " resistance", "variant-db",
             SeqColDruggabilityParamTypes.value, ["true", "false"
                                                  ], "true", "equal", "avg")
            for x in Saureus.drugs
        ]

        SeqCollection.objects(name=self.organism).update(
            __raw__={
                "$pull": {
                    "druggabilityParams": {
                        "target": "variant-db",
                        "uploader": self.user
                    }
                }
            })
        collection = SeqCollection.objects(name=self.organism).get()
        for name, description, target, _type, options, defaultValue, defaultOperation, defaultGroupOperation in search_params:
            Protein.objects(organism=self.organism).update(
                __raw__={"$set": {
                    "search." + user2 + name: False
                }})
            if not collection.has_druggability_param(name):
                dp = SeqColDruggabilityParam(name=name,
                                             description=description,
                                             target=target,
                                             type=_type,
                                             uploader=self.user)
                dp.options = options
                dp.defaultValue = defaultValue
                dp.defaultOperation = defaultOperation
                dp.defaultGroupOperation = defaultGroupOperation
                collection.druggabilityParams.append(dp)
        collection.save()
Exemplo n.º 4
0
def annotate_variants(organism_name, strain_name, database, parse_change):
    """
    parse_change: function that transforms  dbvar.qualifiers["change"] into aa_ref, aa_alt
    """

    collection = SeqCollection.objects(name=organism_name).get()

    prop = strain_name + "_" + database
    Protein.objects(organism=organism_name).update(
        __raw__={"$set": {
            "search." + prop: False
        }})
    if not collection.has_druggability_param(prop):
        dp = SeqColDruggabilityParam(name=prop,
                                     description="Variant in strain " +
                                     strain_name + " is reported in " +
                                     database,
                                     target="variant-strain",
                                     type=SeqColDruggabilityParamTypes.value,
                                     uploader="demo")
        dp.options = ["true", "false"]
        dp.defaultValue = "true"
        dp.defaultOperation = "equal"
        dp.defaultGroupOperation = "avg"
        collection.druggabilityParams.append(dp)
    prop = strain_name + "_" + database + "_pos"
    Protein.objects(organism=organism_name).update(
        __raw__={"$set": {
            "search." + prop: False
        }})
    if not collection.has_druggability_param(prop):
        dp = SeqColDruggabilityParam(
            name=prop,
            description="The position of the variant the strain " +
            strain_name + " is reported in " + database,
            target="variant-strain",
            type=SeqColDruggabilityParamTypes.value,
            uploader="demo")
        dp.options = ["true", "false"]
        dp.defaultValue = "true"
        dp.defaultOperation = "equal"
        dp.defaultGroupOperation = "avg"
        collection.druggabilityParams.append(dp)
    collection.save()

    for p in Protein.objects(__raw__={
            "organism": organism_name,
            "features.qualifiers.strain": strain_name
    }).no_cache():
        dbvars = [f for f in p.features if f.type == database]
        dirty = False
        if dbvars:
            strainvars = [
                f for f in p.features if (f.type == "strain_variant") and (
                    f._data["qualifiers"]["strain"] == strain_name)
            ]

            for dbvar in dbvars:
                dirty = True
                for strainvar in strainvars:
                    strainvar._data["qualifiers"]["ref_pos"] = False
                    if dbvar.location.start == strainvar.location.start:
                        p.search[strain_name + "_" + database + "_pos"] = True
                        strainvar._data["qualifiers"]["ref_pos"] = dbvar._id

                        try:
                            dref, dalt = parse_change(
                                dbvar._data["qualifiers"]["change"])
                            sref, salt = strainvar._data["qualifiers"][
                                "change"].strip().split("/")
                            sref = sref.strip()
                            salt = salt.strip()
                            if (dref == sref) and (dalt == salt):
                                p.search[strain_name + "_" + database] = True
                                strainvar._data["qualifiers"][
                                    "ref"] = dbvar._id

                        except Exception as ex:
                            _log.warn(ex)
                        if (("frameshift"
                             in dbvar._data["qualifiers"]["change"].lower())
                                and
                            ("frameshift" in strainvar._data["qualifiers"]
                             ["change"].lower())):
                            p.search[strain_name + "_" + database] = True
            if dirty:
                p.save()
Exemplo n.º 5
0
    def load_metadata(self, organism_name, datafile, uploader=demo):
        import pandas as pd
        from tqdm import tqdm

        seqCollection = list(SeqCollection.objects(name=organism_name))
        seqCollection = seqCollection[0]
        errors = []

        upload = DataUpload(uploader=uploader, errors=errors)

        df = pd.read_table(datafile, comment="#", index_col=False)

        headerProperties = [
            c for c in df.columns if c != BioMongoDB.GENE_FIELD_IMPORT
        ]
        prots = Protein.objects(organism=organism_name)
        for hp in headerProperties:
            prots.update(
                __raw__={
                    "$pull": {
                        "properties": {
                            "property": hp,
                            "_type": uploader
                        }
                    },
                    "$unset": {
                        "search." + hp: ""
                    }
                })

        upload.properties = headerProperties

        numericFields = []

        for k, v in dict(df.dtypes).items():
            if v not in [np.float64, np.int64]:
                df[k] = df[k].astype('category')
            else:
                numericFields.append(k)

        assert BioMongoDB.GENE_FIELD_IMPORT in df.columns

        for linenum, fields in tqdm(df.iterrows()):

            gene = fields[BioMongoDB.GENE_FIELD_IMPORT]

            if not gene:
                text = str(linenum) + " gene field is empty"
                errors.append(text)
                continue

            count = Protein.objects(organism=organism_name,
                                    alias=gene).count()

            if not count:
                text = str(
                    linenum
                ) + " " + gene + " does not exists in " + organism_name
                print(text)
                errors.append(text)
                continue

            prots = Protein.objects(organism=organism_name, alias=gene)

            for propertyName in headerProperties:
                prop = {"_type": uploader, "value": fields[propertyName]}
                prop["property"] = propertyName
                prots.update(
                    __raw__={
                        "$push": {
                            "properties": prop
                        },
                        "$set": {
                            "search." + propertyName: fields[propertyName]
                        }
                    })

        for p in headerProperties:
            dpType = "number" if p in numericFields else "value"

            options = [] if p in numericFields else list(set(df[p]))
            currentDp = seqCollection.druggabilityParam(p, uploader)

            if currentDp:
                currentDp = currentDp[0]
                currentDp.options = options
                currentDp.type = dpType
            else:
                dp = SeqColDruggabilityParam(type=dpType,
                                             name=p,
                                             options=options,
                                             uploader=uploader,
                                             target="protein")
                seqCollection.druggabilityParams.append(dp)

        seqCollection.uploads.append(upload)
        seqCollection.save()
Exemplo n.º 6
0
    def load_in_sndg(self, organism="H37Rv"):
        from SNDG.BioMongo.Model.Protein import Protein
        from SNDG.BioMongo.Model.Feature import Feature, Location
        from SNDG.BioMongo.Model.SeqCollection import SeqCollection
        from SNDG.BioMongo.Model.SeqColDruggabilityParam import SeqColDruggabilityParamTypes, SeqColDruggabilityParam

        from bson.objectid import ObjectId

        search_params = [("resistance", "Associated with resistance", "variant-db",
                          SeqColDruggabilityParamTypes.value, ["true", "false"], "true", "equal", "avg")

                         ]
        search_params = search_params + [
            (x, "Associated with " + x + " resistance", "variant-db",
             SeqColDruggabilityParamTypes.value, ["true", "false"], "true", "equal", "avg")
            for x in TBDream.drugs
        ]

        Protein.objects(organism=organism).update(__raw__={"$pull": {"features": {"type": "tbdream"}}})
        collection = SeqCollection.objects(name=organism).get()
        for name, description, target, _type, options, defaultValue, defaultOperation, defaultGroupOperation in search_params:
            Protein.objects(organism=organism).update(__raw__={"$set": {"search." + name: False}})
            if not collection.has_druggability_param(name):
                dp = SeqColDruggabilityParam(name=name, description=description, target=target,
                                             type=_type, uploader="demo")
                dp.options = options
                dp.defaultValue = defaultValue
                dp.defaultOperation = defaultOperation
                dp.defaultGroupOperation = defaultGroupOperation
                collection.druggabilityParams.append(dp)
        collection.save()

        for rv, rows in self._df.groupby("rv"):
            prot = list(Protein.objects(organism=organism, gene__iexact=rv))
            if prot:
                prot = prot[0]
                for _, r in rows.iterrows():
                    mut = None
                    if r.change:
                        change = str(r.change[0]) + "/" + str(r.change[1])
                        mut = SeqUtils.seq1(r.change[1])
                    else:
                        change = r.AminoAcid
                    if math.isnan(r.codon):
                        try:
                            pos = int(r.AminoAcid)
                        except:
                            _log.warn("couldnt find the variant position")
                            continue
                    else:
                        pos = int(r.codon)

                    try:
                        res, t = r.RTotalIsolates.strip().split("/")
                        r_div_total_coef = int(res) * 1.0 / int(t)
                        r_div_total = r.RTotalIsolates.strip()

                    except:
                        r_div_total = None
                        r_div_total_coef = None

                    quals = {
                        "drug": r.Drug,
                        "change": change,
                        "gene": r.GeneID,
                        "pattern": r.ResistancePattern,
                        "additional": r.AdditionalMutations,
                        "r_div_total": r_div_total,
                        "r_div_total_coef": r_div_total_coef,
                        "mic": r.MIC}
                    if mut:
                        quals["mut"] = mut
                    fvariant = Feature(_id=ObjectId(), location=Location(start=pos, end=pos), type="tbdream",
                                       identifier="TBDream id " + r.ID,
                                       qualifiers=quals)
                    prot.features.append(fvariant)
                    prot.search.resistance = True
                    prot.search[r.Drug] = True
                prot.save()