示例#1
0
    def _import_protein_details(self, ms_job):
        for row in self.protein_details:
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Protein Details ({})".format(
                    row["ID"])
                self.current += 1
                self.notify_progress(current=self.current,
                                     total=self.total,
                                     message=outstr)

            ref_protein = cmodels.MassSpectrometryProtein.objects.for_species(
                self.species).for_wid(slugify(row["ID"]))

            coordinate, length = map(
                lambda x: int(x), row["Position (to mature prot.)"].split("-"))

            cmodels.MassSpectrometryProteinDetail.objects.get_or_create_with_revision(
                self.detail,
                protein=ref_protein,
                sequence=row["Sequence"],
                sequence_ptm=row["Sequence + PTMs"],
                coordinate=coordinate,
                length=length - coordinate,
                proteotypic=row["Proteotypic"],
                zscore=row["z-score"],
                delta_mass=row["Delta Mass (ppm)"],
                mass=row["Experimental Mass (m/z)"],
                charge=row["Charge"],
                retention_time=row["Retention Time (min)"],
                theoretical_mass=row["Theoretical Mass (Da)"],
                missed_cleavages=row["Missed Cleavages"])
示例#2
0
    def handle(self, *args, **options):
        if not options["wid"]:
            raise CommandError("wid argument is mandatory")

        if not options["reason"]:
            raise CommandError("reason is mandatory")

        wid = slugify(options["wid"])
        reason = options["reason"]

        if options["wid"] != wid:
            raise CommandError(
                "Wid {} contained invalid characters. Only letters, numbers and _ are allowed"
                .format(options["wid"]))

        try:
            species_obj = Species.objects.get(wid=wid)
        except:
            if self.verify_species_exists:
                raise CommandError("Species {} not found".format(wid))
            else:
                species_obj = Species(wid=wid)

        if not options["user"]:
            options["user"] = "******"

        revdetail = RevisionDetail()
        revdetail.user = UserProfile.objects.get(
            user__username=options["user"])
        revdetail.reason = reason
        revdetail.save()

        self.handle_command(species_obj, revdetail, *args, **options)
示例#3
0
    def _import_target_peptides(self, ms_job):
        target_type = cmodels.Type.objects.for_wid("Target-Peptide",
                                                   create=True)
        target_type.species = self.species
        target_type.save(self.detail)

        for i, item in enumerate(self.target_peptides):
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Target Peptide ({})".format(
                    item["Matched Proteins"])
                self.current += 1
                self.notify_progress(current=self.current,
                                     total=self.total,
                                     message=outstr)
            peptide = cmodels.Peptide.objects.for_species(
                self.species).for_wid("{}-{}".format(
                    i + 1, slugify(item["Matched Proteins"])),
                                      create=True)

            peptide.parent = ms_job
            peptide.sequence = item["Sequence"]
            peptide.length = len(item["Sequence"])
            peptide.proteotypic = item["Proteotypic"]
            peptide.charge = item["Charge"]
            peptide.mass = item["m/z"]
            peptide.zscore = item["zscore"]
            peptide.retention_time = item["RT"]
            peptide.species = self.species
            peptide.save(self.detail)
            peptide.type.add(target_type)

            for protein in item["Matched Proteins"].split(","):
                prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(
                    self.detail, value=protein.strip())
                peptide.proteins.add(prot)
 def handle(self, *args, **options):
     if not options["wid"]:
         raise CommandError("wid argument is mandatory")
     
     if not options["reason"]:
         raise CommandError("reason is mandatory")
     
     wid = slugify(options["wid"])
     reason = options["reason"]
     
     if options["wid"] != wid:
         raise CommandError("Wid {} contained invalid characters. Only letters, numbers and _ are allowed".format(options["wid"]))
     
     try:
         species_obj = Species.objects.get(wid = wid)
     except:
         if self.verify_species_exists:
             raise CommandError("Species {} not found".format(wid))
         else:
             species_obj = Species(wid = wid)
     
     if not options["user"]:
         options["user"] = "******"
     
     revdetail = RevisionDetail()
     revdetail.user = UserProfile.objects.get(user__username = options["user"])
     revdetail.reason = reason
     revdetail.save()
     
     self.handle_command(species_obj, revdetail, *args, **options)
 def try_slugify(self, name, not_slug):
     slug = slugify(not_slug)
     
     if slug != not_slug:
         raise ValueError("{} {} contained invalid characters. Only letters, numbers and _ are allowed".format(name, not_slug))
     
     return slug
示例#6
0
    def _import_protein_summary(self, ms_job):
        # Import the protein data from the file
        for row in self.protein_summary:
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Protein Summary ({})".format(
                    row["ID"])
                self.current += 1
                self.notify_progress(current=self.current,
                                     total=self.total,
                                     message=outstr)

            protein = cmodels.MassSpectrometryProtein.objects.for_species(
                self.species).for_wid(slugify(row["ID"]), create=True)
            """:type: cmodels.MassSpectrometryProtein"""
            protein.comments = row["Description"]

            uniprot = cmodels.CrossReference.objects.get_or_create_with_revision(
                self.detail, source="UniProt", xid=row["AC"])

            protein.score = row["Protein Score"]
            protein.coverage = row["% Coverage"]
            protein.sequence = row["Protein Seq"]
            protein.length = len(protein.sequence)

            # #PSMs -> Peptide spectrum match -> Entries in Details
            # #Peptides -> Count number of different sequences(?) in Details

            protein.pi = row["Protein PI"]
            protein.mass = row["Protein Mass (Da)"]

            protein.parent = ms_job
            protein.species = self.species
            protein.save(self.detail)
            protein.cross_references.add(uniprot)

            # Extract all GO terms splitted to a generator containing list
            # [name, type (GO), identifier]
            go_term_row = row["GO terms"]
            if go_term_row:
                go_terms = (x.groups()
                            for x in (re.match(r"^(.*) \((.*):(.*)\)$", x)
                                      for x in go_term_row.split(";") if x))
                for name, typ, identifier in go_terms:
                    go = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source=typ, xid=identifier)
                    protein.cross_references.add(go)
                    # No field for name :/

            if row["#Ambiguous Prots"] > 0:
                for amb_protein in row["Ambiguous Prots"].split(","):
                    prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(
                        self.detail, value=amb_protein.strip())
                    protein.ambiguous.add(prot)

            if row["#Sub-Prots"] > 0:
                for sub_protein in row["Sub-Prots"].split(","):
                    prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(
                        self.detail, value=sub_protein.strip())
                    protein.sub.add(prot)
    def parse(self, handle):
        if hasattr(self, "notify_progress"):
            self.notify_progress(current=0, total=1, message="Parsing InterProScan file...")
        
        xml = handle.read()
        # Remove xmlns namespace, makes working with ElementTree more complicated
        xml = re.sub(' xmlns="[^"]+"', '', xml, count=1)

        root = ET.fromstring(xml)

        all_proteins = root.findall("protein")
        all_proteins_len = len(all_proteins)

        for i, protein in enumerate(all_proteins):
            xref = protein.find("xref")
            wid = xref.get("id").split("|", 2)[0]

            self.total = all_proteins_len

            if hasattr(self, "notify_progress"):
                self.notify_progress(current=i+1, total=self.total, message="Parsing features of {} ({}/{})".
                                                                                        format(wid, i+1, self.total))

            try:
                protein_item = cmodels.ProteinMonomer.objects.for_species(self.species).for_wid(wid)
                self.protein_monomers_cf[protein_item] = []
            except ObjectDoesNotExist:
                # ToDo: Error reporting?
                continue

            for matches in protein.findall("matches"):
                for match in matches:
                    signature = match.find("signature")
                    wid = slugify(signature.get("ac"))
                    cf = cmodels.ChromosomeFeature(wid=wid)
                    cf.name = signature.get("name") or ""
                    cf.comments = signature.get("desc") or ""
                    self.xrefs[wid] = []
                    self.protein_monomers_cf[protein_item].append(cf)
                    self.types[wid] = match.tag.title()

                    for entry in signature.findall("entry"):
                        for xref in entry:
                            self.xrefs[wid].append([xref.get("id"), xref.get("db")])

                    self.feature_positions[wid] = []
                    locations = match.find("locations")
                    for location in locations:
                        start = int(location.get("start"))
                        end = int(location.get("end"))
                        direction = "f"
                        if start > end:
                            start, end = end, start
                            direction = "r"
                        length = end - start
                        self.feature_positions[wid].append({"chromosome": protein_item.gene.chromosome_id,
                                                            "coordinate": start + protein_item.gene.coordinate,
                                                            "length": length,
                                                            "direction": direction})
示例#8
0
    def _import_jobs_params(self):
        ms_job = cmodels.MassSpectrometryJob.objects.for_species(
            self.species).for_wid(slugify(
                self.export_parameters["jobs"][0][0]),
                                  create=True)
        ms_job.name = ms_job.wid
        ms_job.species = self.species
        ms_job.save(self.detail)

        return ms_job
    def parse(self, handle):
        self.report_progress(current=0, total=1, message="Parsing FASTA file")

        for record in SeqIO.parse(handle, "fasta"):
            wid, start, end, description = FastaFeature._parse_header(record.description)
            wid = slugify(wid)

            self.data.append({
                "wid": wid,
                "start": int(start),
                "end": int(end),
                "description": description
            })
    def parse(self, handle):
        self.report_progress(current=0, total=1, message="Parsing FASTA file")

        for record in SeqIO.parse(handle, "fasta"):
            wid, start, end, description = FastaFeature._parse_header(
                record.description)
            wid = slugify(wid)

            self.data.append({
                "wid": wid,
                "start": int(start),
                "end": int(end),
                "description": description
            })
示例#11
0
    def _import_protein_summary(self, ms_job):
        # Import the protein data from the file
        for row in self.protein_summary:
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Protein Summary ({})".format(row["ID"])
                self.current += 1
                self.notify_progress(current=self.current, total=self.total, message=outstr)

            protein = cmodels.MassSpectrometryProtein.objects.for_species(self.species).for_wid(slugify(row["ID"]), create=True)
            """:type: cmodels.MassSpectrometryProtein"""
            protein.comments = row["Description"]

            uniprot = cmodels.CrossReference.objects.get_or_create_with_revision(self.detail, source="UniProt", xid=row["AC"])

            protein.score = row["Protein Score"]
            protein.coverage = row["% Coverage"]
            protein.sequence = row["Protein Seq"]
            protein.length = len(protein.sequence)

            # #PSMs -> Peptide spectrum match -> Entries in Details
            # #Peptides -> Count number of different sequences(?) in Details

            protein.pi = row["Protein PI"]
            protein.mass = row["Protein Mass (Da)"]

            protein.parent = ms_job
            protein.species = self.species
            protein.save(self.detail)
            protein.cross_references.add(uniprot)

            # Extract all GO terms splitted to a generator containing list
            # [name, type (GO), identifier]
            go_term_row = row["GO terms"]
            if go_term_row:
                go_terms = (x.groups() for x in (re.match(r"^(.*) \((.*):(.*)\)$", x) for x in go_term_row.split(";") if x))
                for name, typ, identifier in go_terms:
                    go = cmodels.CrossReference.objects.get_or_create_with_revision(self.detail, source=typ, xid=identifier)
                    protein.cross_references.add(go)
                    # No field for name :/

            if row["#Ambiguous Prots"] > 0:
                for amb_protein in row["Ambiguous Prots"].split(","):
                    prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(self.detail, value=amb_protein.strip())
                    protein.ambiguous.add(prot)

            if row["#Sub-Prots"] > 0:
                for sub_protein in row["Sub-Prots"].split(","):
                    prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(self.detail, value=sub_protein.strip())
                    protein.sub.add(prot)
示例#12
0
    def _import_target_peptides(self, ms_job):
        target_type = cmodels.Type.objects.for_wid("Target-Peptide", create=True)
        target_type.species = self.species
        target_type.save(self.detail)

        for i, item in enumerate(self.target_peptides):
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Target Peptide ({})".format(item["Matched Proteins"])
                self.current += 1
                self.notify_progress(current=self.current, total=self.total, message=outstr)
            peptide = cmodels.Peptide.objects.for_species(self.species).for_wid("{}-{}".format(i+1, slugify(item["Matched Proteins"])), create=True)

            peptide.parent = ms_job
            peptide.sequence = item["Sequence"]
            peptide.length = len(item["Sequence"])
            peptide.proteotypic = item["Proteotypic"]
            peptide.charge = item["Charge"]
            peptide.mass = item["m/z"]
            peptide.zscore = item["zscore"]
            peptide.retention_time = item["RT"]
            peptide.species = self.species
            peptide.save(self.detail)
            peptide.type.add(target_type)

            for protein in item["Matched Proteins"].split(","):
                prot = cmodels.EntryBasicTextData.objects.get_or_create_with_revision(self.detail, value=protein.strip())
                peptide.proteins.add(prot)
示例#13
0
    def _import_jobs_params(self):
        ms_job = cmodels.MassSpectrometryJob.objects.for_species(self.species).for_wid(slugify(self.export_parameters["jobs"][0][0]), create=True)
        ms_job.name = ms_job.wid
        ms_job.species = self.species
        ms_job.save(self.detail)

        return ms_job
    def parse(self, handle):
        if hasattr(self, "notify_progress"):
            self.notify_progress(current=0,
                                 total=1,
                                 message="Parsing InterProScan file...")

        xml = handle.read()
        # Remove xmlns namespace, makes working with ElementTree more complicated
        xml = re.sub(' xmlns="[^"]+"', '', xml, count=1)

        root = ET.fromstring(xml)

        all_proteins = root.findall("protein")
        all_proteins_len = len(all_proteins)

        for i, protein in enumerate(all_proteins):
            xref = protein.find("xref")
            wid = xref.get("id").split("|", 2)[0]

            self.total = all_proteins_len

            if hasattr(self, "notify_progress"):
                self.notify_progress(
                    current=i + 1,
                    total=self.total,
                    message="Parsing features of {} ({}/{})".format(
                        wid, i + 1, self.total))

            try:
                protein_item = cmodels.ProteinMonomer.objects.for_species(
                    self.species).for_wid(wid)
                self.protein_monomers_cf[protein_item] = []
            except ObjectDoesNotExist:
                # ToDo: Error reporting?
                continue

            for matches in protein.findall("matches"):
                for match in matches:
                    signature = match.find("signature")
                    wid = slugify(signature.get("ac"))
                    cf = cmodels.ChromosomeFeature(wid=wid)
                    cf.name = signature.get("name") or ""
                    cf.comments = signature.get("desc") or ""
                    self.xrefs[wid] = []
                    self.protein_monomers_cf[protein_item].append(cf)
                    self.types[wid] = match.tag.title()

                    for entry in signature.findall("entry"):
                        for xref in entry:
                            self.xrefs[wid].append(
                                [xref.get("id"),
                                 xref.get("db")])

                    self.feature_positions[wid] = []
                    locations = match.find("locations")
                    for location in locations:
                        start = int(location.get("start"))
                        end = int(location.get("end"))
                        direction = "f"
                        if start > end:
                            start, end = end, start
                            direction = "r"
                        length = end - start
                        self.feature_positions[wid].append({
                            "chromosome":
                            protein_item.gene.chromosome_id,
                            "coordinate":
                            start + protein_item.gene.coordinate,
                            "length":
                            length,
                            "direction":
                            direction
                        })
示例#15
0
    def apply(self):
        self.detail.save()

        self.species.save(self.detail)

        obj = cmodels.Chromosome if self.is_chromosome else cmodels.Plasmid
        chromosome = obj.objects.for_species(self.species).for_wid(
            self.chromosome, create=True)
        chromosome.name = self.name
        chromosome.sequence = str(
            self.record.seq)  # Cast needed, otherwise revision-compare fails!
        chromosome.length = len(self.record.seq)
        chromosome.species = self.species
        chromosome.save(self.detail)

        if self.record.dbxrefs:
            for xref in self.record.dbxrefs:
                # BioPython doesnt always properly split the db xrefs
                xref = xref.split(" ")
                for x in xref:
                    if ":" in x:
                        source, xid = x.split(":")
                        x = cmodels.CrossReference.objects.get_or_create_with_revision(
                            self.detail, source=source, xid=xid)
                        chromosome.cross_references.add(x)

        if "references" in self.annotation:
            for ref in self.annotation["references"]:
                # calculate the wid
                if ref.pubmed_id:
                    wid = "PUB_" + ref.pubmed_id
                    name = "Pubmed #" + ref.pubmed_id
                elif ref.medline_id:
                    wid = "MED_" + ref.medline_id
                    name = "Pubmed #" + ref.medline_id
                else:
                    publication = cmodels.PublicationReference.objects.filter(
                        authors__exact=ref.authors,
                        title__exact=ref.title,
                        publication__exact=ref.journal)
                    next_id = 0
                    if publication.exists():
                        wid = publication[0].wid
                        name = publication[0].name
                    else:
                        refs = cmodels.PublicationReference.objects.filter(
                            wid__startswith="REF_")
                        if refs.exists():
                            last = refs.reverse()[0]
                            next_id = int(last.wid[4:], 10) + 1

                            wid = "REF_" + "%04d" % (next_id)
                            name = "Reference #%04d" % (next_id)
                        else:
                            wid = "REF_0001"
                            name = "Reference #0001"

                pubref = cmodels.PublicationReference.objects.for_wid(
                    slugify(wid), create=True)
                pubref.name = name
                pubref.authors = ref.authors
                pubref.title = ref.title
                pubref.publication = ref.journal
                pubref.species = self.species
                pubref.save(self.detail)

                if ref.pubmed_id:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source="PUBMED", xid=ref.pubmed_id)
                    pubref.cross_references.add(xref)

                if ref.medline_id:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source="MEDLINE", xid=ref.medline_id)
                    pubref.cross_references.add(xref)

                chromosome.publication_references.add(pubref)

        if "gi" in self.annotation:
            xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                self.detail, xid=self.annotation["gi"], source="GI")
            chromosome.cross_references.add(xref)

        features = self.record.features

        if len(features) > 0:
            if features[0].type == "source":
                if "db_xref" in features[0].qualifiers:
                    for xref in features[0].qualifiers["db_xref"]:
                        if ":" in xref:
                            source, xid = xref.split(":")
                            xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                                self.detail, source=source, xid=xid)
                            chromosome.cross_references.add(xref)

        gene_features = filter(lambda x: x.type == "gene", features)
        cds_features = filter(
            lambda x: x.type in ["CDS", "ncRNA", "rRNA", "tmRNA", "tRNA"],
            features)

        gene_map = {}
        for g in gene_features:
            if not "locus_tag" in g.qualifiers:
                self.stderr.write("WARN: " + str(g) + " without locus")
                continue
            loci = g.qualifiers["locus_tag"][0]
            if loci in gene_map:
                raise ValueError("locus_tag " + loci + " appeared twice")
            gene_map[loci] = g

        cds_map = {}
        for c in cds_features:
            if not "locus_tag" in c.qualifiers:
                self.stderr.write("WARN: " + str(c) + " without locus")
                continue
            loci = c.qualifiers["locus_tag"][0]
            if loci in cds_map:
                raise ValueError("locus_tag " + loci + " appeared twice")
            if loci in gene_map:
                cds_map[loci] = c

        sorted_cds_values = sorted(cds_map.values(),
                                   key=lambda x: x.qualifiers["locus_tag"])
        for i, v in enumerate(sorted_cds_values):

            qualifiers = v.qualifiers

            if not self.species.genetic_code:
                if "transl_table" in qualifiers:
                    self.species.genetic_code = qualifiers["transl_table"][0]
                    self.species.save(self.detail)

            g = cmodels.Gene.objects.for_species(self.species).for_wid(
                slugify(qualifiers["locus_tag"][0]), create=True)

            if hasattr(self, "notify_progress"):
                outstr = "Importing Gene %s (%d/%d)" % (g.wid, i + 1,
                                                        len(cds_map.values()))
                self.notify_progress(current=i + 1,
                                     total=len(cds_map.values()),
                                     message=outstr)

            g.chromosome = chromosome

            if "gene" in qualifiers:
                g.name = qualifiers["gene"][0]
                g.symbol = qualifiers["gene"][0]

            g.direction = 'f' if v.location.strand == 1 else 'r'

            # __len__ because len() fails for numbers < 0
            # Joins output the wrong length
            if v.location.__len__() < 0:
                g.length = v.location.__len__() + len(self.record.seq)
            else:
                g.length = len(v.location)

            g.coordinate = v.location.start + 1 if 'f' else v.location.start

            if "note" in qualifiers:
                g.comments = "\n".join(qualifiers["note"])

            g.species = self.species
            g.save(self.detail)

            if "db_xref" in qualifiers:
                for xref in qualifiers["db_xref"]:
                    if ":" in xref:
                        source, xid = xref.split(":")
                        xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                            self.detail, source=source, xid=xid)
                        g.cross_references.add(xref)

            if "EC_number" in qualifiers:
                for ec in qualifiers["EC_number"]:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source="EC", xid=ec)
                    g.cross_references.add(xref)

            if "gene_synonym" in qualifiers:
                for synonym in qualifiers["gene_synonym"]:
                    # Inconsistency: Multiple synonyms appear in one entry,
                    # why don't they split them like for all other items?
                    for syn in synonym.split(";"):
                        obj = cmodels.Synonym.objects.get_or_create_with_revision(
                            self.detail, name=syn.strip())
                        g.synonyms.add(obj)

            if "protein_id" in qualifiers:
                protxref = qualifiers["protein_id"][0]
                wid = slugify(g.wid + "_Monomer")

                protein = cmodels.ProteinMonomer.objects.for_species(
                    self.species).for_wid(wid, create=True)

                if "product" in qualifiers:
                    protein.name = qualifiers["product"][0]

                xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                    self.detail, source="RefSeq", xid=protxref)

                protein.gene = g
                protein.species = self.species
                protein.save(self.detail)

                protein.cross_references.add(xref)

            if v.type == "CDS":
                v.type = "mRNA"

            t = cmodels.Type.objects.for_wid(wid=slugify(v.type), create=True)
            t.name = v.type

            t.species = self.species
            t.save(self.detail)

            g.type.add(t)

        if hasattr(self, "notify_progress"):
            outstr = "Assigning KEGG pathways"
            self.notify_progress(current=len(cds_map.values()),
                                 total=len(cds_map.values()),
                                 message=outstr)

        cmodels.Pathway.add_kegg_pathway(self.species, self.detail)
示例#16
0
    def apply(self):
        self.detail.save()
        
        total = len(self.compartments) + len(self.sbml_species) + len(self.reactions)

        # Compartment importer
        for i, compartment in enumerate(self.compartments):
            wid = slugify(compartment.getId())
            
            if compartment.getName():
                name = compartment.getName()
            else:
                name = wid

            if hasattr(self, "notify_progress"):
                out_str = "Importing Compartment %s (%d/%d)" % (wid, i + 1, total)
                self.notify_progress(current = i+1, total = total, message = out_str)
            
            # TODO: compartment.getOutside() not implemented
            cobj = cmodels.Compartment.objects.for_species(self.species).for_wid(wid, create = True)
            
            cobj.name = name
            cobj.species = self.species
            cobj.save(self.detail)

        # Species (= Metabolites) importer
        for i, specie in enumerate(self.sbml_species):
            if not self.model.getCompartment(specie.getCompartment()):
                ##self.stderr.write("WARN: Species {} has invalid compartment {}".format(specie.id, specie.getCompartment()))
                continue
                
            wid = slugify(specie.getId())
            if specie.getName():
                name = specie.getName()
            else:
                name = wid
            
            if hasattr(self, "notify_progress"):
                current = len(self.compartments) + i + 1
                out_str = "Importing Metabolite %s (%d/%d)" % (wid, current, total)
                self.notify_progress(current = current, total = total, message = out_str)
            
            # TODO: specie.getBoundaryCondition() not implemented
            sobj = cmodels.Metabolite.objects.for_species(self.species).for_wid(wid, create = True)
            
            sobj.name = name
            sobj.charge = 0 # TODO
            sobj.is_hydrophobic = False # TODO
            sobj.species = self.species
            sobj.save(self.detail)

        for i, reaction in enumerate(self.reactions):
            wid = slugify(reaction.getId())
            if reaction.getName():
                name = reaction.getName()
            else:
                name = wid
                
            valid = False
            
            if hasattr(self, "notify_progress"):
                current = len(self.compartments) + len(self.sbml_species) + i + 1
                out_str = "Importing Reaction %s (%d/%d)" % (wid, current, total)
                self.notify_progress(current = current, total = total, message = out_str)
            
            # Validation of reactants
            reactants = map(lambda i: reaction.getReactant(i), range(len(reaction.getListOfReactants())))
            products = map(lambda i: reaction.getProduct(i), range(len(reaction.getListOfProducts())))
            
            for reactant in reactants:
                if not self.model.getSpecies(reactant.getSpecies()):
                    ##self.stderr.write("WARN: Reactant {} has invalid species {}".format(reactant.id, reactant.species))
                    break
            else:
                # Validation of products
                for product in products:
                    if not self.model.getSpecies(product.getSpecies()):
                        ##self.stderr.write("WARN: Product {} has invalid species {}".format(product.id, product.species))
                        break
                else:
                    # Validation passed
                    valid = True
            
            if valid:
                reaction_obj = cmodels.Reaction.objects.for_species(self.species).for_wid(wid, create = True)
                
                reaction_obj.name = name
                reaction_obj.direction = 'r' if reaction.getReversible() else 'f'
                reaction_obj.is_spontaneous = False  # TODO
                reaction_obj.species = self.species
                reaction_obj.save(self.detail)

                for reactant in reactants:
                    #try:
                    #    participant_obj = cmodels.ReactionStoichiometryParticipant.objects.get(wid = wid)
                    #except ObjectDoesNotExist:
                    #    participant_obj = cmodels.ReactionStoichiometryParticipant(wid = wid)

                    participant_obj = cmodels.ReactionStoichiometryParticipant()
                    participant_obj.molecule = cmodels.Metabolite.objects.for_species(self.species).for_wid(slugify(reactant.getSpecies()))
                    participant_obj.coefficient = -reactant.getStoichiometry()
                    participant_obj.compartment = cmodels.Compartment.objects.for_species(self.species).for_wid(slugify(self.model.getSpecies(reactant.getSpecies()).getCompartment()))
                    participant_obj.save(self.detail)

                    reaction_obj.stoichiometry.add(participant_obj)

                for product in products:
                    #try:
                    #    participant_obj = cmodels.ReactionStoichiometryParticipant.objects.get(wid = wid)
                    #except ObjectDoesNotExist:
                    #    participant_obj = cmodels.ReactionStoichiometryParticipant(wid = wid)

                    participant_obj = cmodels.ReactionStoichiometryParticipant()
                    participant_obj.molecule = cmodels.Metabolite.objects.for_species(self.species).for_wid(slugify(product.getSpecies()))
                    participant_obj.coefficient = product.getStoichiometry()
                    participant_obj.compartment = cmodels.Compartment.objects.for_species(self.species).for_wid(slugify(self.model.getSpecies(product.getSpecies()).getCompartment()))
                    participant_obj.detail = self.detail
                    participant_obj.save(self.detail)

                    reaction_obj.stoichiometry.add(participant_obj)
示例#17
0
    def _import_protein_details(self, ms_job):
        for row in self.protein_details:
            if hasattr(self, "notify_progress"):
                outstr = "EasyProt: Importing Protein Details ({})".format(row["ID"])
                self.current += 1
                self.notify_progress(current=self.current, total=self.total, message=outstr)

            ref_protein = cmodels.MassSpectrometryProtein.objects.for_species(self.species).for_wid(slugify(row["ID"]))

            coordinate, length = map(lambda x: int(x), row["Position (to mature prot.)"].split("-"))

            cmodels.MassSpectrometryProteinDetail.objects.get_or_create_with_revision(
                self.detail,
                protein=ref_protein,
                sequence=row["Sequence"],
                sequence_ptm=row["Sequence + PTMs"],
                coordinate=coordinate,
                length=length-coordinate,
                proteotypic=row["Proteotypic"],
                zscore=row["z-score"],
                delta_mass=row["Delta Mass (ppm)"],
                mass=row["Experimental Mass (m/z)"],
                charge=row["Charge"],
                retention_time=row["Retention Time (min)"],
                theoretical_mass=row["Theoretical Mass (Da)"],
                missed_cleavages=row["Missed Cleavages"]
            )
示例#18
0
    def apply(self):
        self.detail.save()

        self.species.save(self.detail)

        obj = cmodels.Chromosome if self.is_chromosome else cmodels.Plasmid
        chromosome = obj.objects.for_species(self.species).for_wid(self.chromosome, create=True)
        chromosome.name = self.name
        chromosome.sequence = str(self.record.seq)  # Cast needed, otherwise revision-compare fails!
        chromosome.length = len(self.record.seq)
        chromosome.species = self.species
        chromosome.save(self.detail)

        if self.record.dbxrefs:
            for xref in self.record.dbxrefs:
                # BioPython doesnt always properly split the db xrefs
                xref = xref.split(" ")
                for x in xref:
                    if ":" in x:
                        source, xid = x.split(":")
                        x = cmodels.CrossReference.objects.get_or_create_with_revision(
                            self.detail, source=source, xid=xid
                        )
                        chromosome.cross_references.add(x)

        if "references" in self.annotation:
            for ref in self.annotation["references"]:
                # calculate the wid
                if ref.pubmed_id:
                    wid = "PUB_" + ref.pubmed_id
                    name = "Pubmed #" + ref.pubmed_id
                elif ref.medline_id:
                    wid = "MED_" + ref.medline_id
                    name = "Pubmed #" + ref.medline_id
                else:
                    publication = cmodels.PublicationReference.objects.filter(
                        authors__exact=ref.authors, title__exact=ref.title, publication__exact=ref.journal
                    )
                    next_id = 0
                    if publication.exists():
                        wid = publication[0].wid
                        name = publication[0].name
                    else:
                        refs = cmodels.PublicationReference.objects.filter(wid__startswith="REF_")
                        if refs.exists():
                            last = refs.reverse()[0]
                            next_id = int(last.wid[4:], 10) + 1

                            wid = "REF_" + "%04d" % (next_id)
                            name = "Reference #%04d" % (next_id)
                        else:
                            wid = "REF_0001"
                            name = "Reference #0001"

                pubref = cmodels.PublicationReference.objects.for_wid(slugify(wid), create=True)
                pubref.name = name
                pubref.authors = ref.authors
                pubref.title = ref.title
                pubref.publication = ref.journal
                pubref.species = self.species
                pubref.save(self.detail)

                if ref.pubmed_id:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source="PUBMED", xid=ref.pubmed_id
                    )
                    pubref.cross_references.add(xref)

                if ref.medline_id:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                        self.detail, source="MEDLINE", xid=ref.medline_id
                    )
                    pubref.cross_references.add(xref)

                chromosome.publication_references.add(pubref)

        if "gi" in self.annotation:
            xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                self.detail, xid=self.annotation["gi"], source="GI"
            )
            chromosome.cross_references.add(xref)

        features = self.record.features

        if len(features) > 0:
            if features[0].type == "source":
                if "db_xref" in features[0].qualifiers:
                    for xref in features[0].qualifiers["db_xref"]:
                        if ":" in xref:
                            source, xid = xref.split(":")
                            xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                                self.detail, source=source, xid=xid
                            )
                            chromosome.cross_references.add(xref)

        gene_features = filter(lambda x: x.type == "gene", features)
        cds_features = filter(lambda x: x.type in ["CDS", "ncRNA", "rRNA", "tmRNA", "tRNA"], features)

        gene_map = {}
        for g in gene_features:
            if not "locus_tag" in g.qualifiers:
                self.stderr.write("WARN: " + str(g) + " without locus")
                continue
            loci = g.qualifiers["locus_tag"][0]
            if loci in gene_map:
                raise ValueError("locus_tag " + loci + " appeared twice")
            gene_map[loci] = g

        cds_map = {}
        for c in cds_features:
            if not "locus_tag" in c.qualifiers:
                self.stderr.write("WARN: " + str(c) + " without locus")
                continue
            loci = c.qualifiers["locus_tag"][0]
            if loci in cds_map:
                raise ValueError("locus_tag " + loci + " appeared twice")
            if loci in gene_map:
                cds_map[loci] = c

        sorted_cds_values = sorted(cds_map.values(), key=lambda x: x.qualifiers["locus_tag"])
        for i, v in enumerate(sorted_cds_values):

            qualifiers = v.qualifiers

            if not self.species.genetic_code:
                if "transl_table" in qualifiers:
                    self.species.genetic_code = qualifiers["transl_table"][0]
                    self.species.save(self.detail)

            g = cmodels.Gene.objects.for_species(self.species).for_wid(slugify(qualifiers["locus_tag"][0]), create=True)

            if hasattr(self, "notify_progress"):
                outstr = "Importing Gene %s (%d/%d)" % (g.wid, i + 1, len(cds_map.values()))
                self.notify_progress(current=i + 1, total=len(cds_map.values()), message=outstr)

            g.chromosome = chromosome

            if "gene" in qualifiers:
                g.name = qualifiers["gene"][0]
                g.symbol = qualifiers["gene"][0]

            g.direction = "f" if v.location.strand == 1 else "r"

            # __len__ because len() fails for numbers < 0
            # Joins output the wrong length
            if v.location.__len__() < 0:
                g.length = v.location.__len__() + len(self.record.seq)
            else:
                g.length = len(v.location)

            g.coordinate = v.location.start + 1 if "f" else v.location.start

            if "note" in qualifiers:
                g.comments = "\n".join(qualifiers["note"])

            g.species = self.species
            g.save(self.detail)

            if "db_xref" in qualifiers:
                for xref in qualifiers["db_xref"]:
                    if ":" in xref:
                        source, xid = xref.split(":")
                        xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                            self.detail, source=source, xid=xid
                        )
                        g.cross_references.add(xref)

            if "EC_number" in qualifiers:
                for ec in qualifiers["EC_number"]:
                    xref = cmodels.CrossReference.objects.get_or_create_with_revision(self.detail, source="EC", xid=ec)
                    g.cross_references.add(xref)

            if "gene_synonym" in qualifiers:
                for synonym in qualifiers["gene_synonym"]:
                    # Inconsistency: Multiple synonyms appear in one entry,
                    # why don't they split them like for all other items?
                    for syn in synonym.split(";"):
                        obj = cmodels.Synonym.objects.get_or_create_with_revision(self.detail, name=syn.strip())
                        g.synonyms.add(obj)

            if "protein_id" in qualifiers:
                protxref = qualifiers["protein_id"][0]
                wid = slugify(g.wid + "_Monomer")

                protein = cmodels.ProteinMonomer.objects.for_species(self.species).for_wid(wid, create=True)

                if "product" in qualifiers:
                    protein.name = qualifiers["product"][0]

                xref = cmodels.CrossReference.objects.get_or_create_with_revision(
                    self.detail, source="RefSeq", xid=protxref
                )

                protein.gene = g
                protein.species = self.species
                protein.save(self.detail)

                protein.cross_references.add(xref)

            if v.type == "CDS":
                v.type = "mRNA"

            t = cmodels.Type.objects.for_wid(wid=slugify(v.type), create=True)
            t.name = v.type

            t.species = self.species
            t.save(self.detail)

            g.type.add(t)

        if hasattr(self, "notify_progress"):
            outstr = "Assigning KEGG pathways"
            self.notify_progress(current=len(cds_map.values()), total=len(cds_map.values()), message=outstr)

        cmodels.Pathway.add_kegg_pathway(self.species, self.detail)