Пример #1
0
    def load_whog(self, whog_path):
        with open(whog_path, "r") as handle:
            groups = handle.read().split("_______")
        for group in groups:
            glines = [g.strip() for g in group.split("\n") if g.strip()]
            if glines:
                line = glines[0].lower()
                parent = line.lower().strip().split(" ")[0]
                term = line.lower().strip().split(" ")[1]
                name = " ".join(line.lower().strip().split(" ")[2:])

                ont_doc = Ontology(term=term,
                                   name=name,
                                   parent=parent,
                                   ontology="cog")
                keywords = self.ki.extract_keywords(line)

                if len(parent) > 3:
                    for x in parent[1:-1]:
                        parent_ont_doc = Ontology.objects(term='[' + x +
                                                          ']').get()
                        keywords = list(set(parent_ont_doc.keywords +
                                            keywords))
                        parent_ont_doc.children.append(term)
                        parent_ont_doc.save()
                else:
                    parent_ont_doc = Ontology.objects(term=parent).get()
                    parent_ont_doc.children.append(term)
                    parent_ont_doc.save()
                    keywords = list(set(parent_ont_doc.keywords + keywords))

                ont_doc.keywords = keywords

                ont_doc.save()
Пример #2
0
    def load_dat(self, reactions_file, database, postfix):
        with open(reactions_file) as reactions_handle:
            lines = [
                x for x in reactions_handle.readlines()
                if not x.startswith("#")
            ]
            records = re.split("//\n", "\n".join(lines))
            for record in records:
                if not record.strip():
                    continue

                ont_doc = Ontology(ontology=self.ontology_name + postfix)
                ont_doc.databases.append(database)
                reaction_types = []
                ec = None
                for str_record in [y for y in record.split("\n") if y]:
                    if str_record.strip() and len(str_record.strip()) > 3:

                        if len(str_record.split(" - ")) > 1:

                            field = str_record.split(" - ")[0].strip()
                            try:
                                value = str_record.split(
                                    " - ")[1].strip().decode("utf-8")
                            except UnicodeDecodeError:
                                continue

                            if field == "UNIQUE-ID":
                                ont_doc.term = value.lower()
                            elif field == "TYPES":
                                reaction_types.append(value)
                            elif field == "IN-PATHWAY":
                                ont_doc.parents.append(value)
                            elif field == "COMMON-NAME":
                                ont_doc.name = value
                            elif (field == "COMMENT") and (not ont_doc.name):
                                ont_doc.description = value
                            elif (field == "EC-NUMBER") and (not ont_doc.name):
                                ec = value

                if not ont_doc.description:
                    ont_doc.description = "|".join(reaction_types)
                if not ont_doc.name:
                    if ec:
                        ont_doc.name = ec
                    else:
                        ont_doc.name = ont_doc.term
                ont_doc.keywords = self.ki.extract_keywords(
                    ont_doc.name) + [ont_doc.term]
                ont_doc.types = reaction_types
                if ec:
                    ont_doc.keywords.append(ec)
                if not ont_doc.term:
                    print(record)
                else:
                    ont_doc.save()
Пример #3
0
    def load_enzclass(self, enzclass_file_path):

        root = Ontology(ontology=self.ontology_name, term="root", name="ec",
                        children=["ec:1.-.-.-", "ec:2.-.-.-", "ec:3.-.-.-", "ec:4.-.-.-", "ec:5.-.-.-", "ec:6.-.-.-"])
        root.save()

        with open(enzclass_file_path) as enzclass_handle:
            for line in enzclass_handle:
                if re.match(r'^[1-6][.]', line):
                    name = line.split(".-")[-1].strip()
                    term = "ec:" + line.replace(name, "").replace(" ", "").strip()

                    ont_doc = Ontology(ontology=self.ontology_name, term=term, name=name)
                    ont_doc.keywords = self.ki.extract_keywords(ont_doc.name) + [ont_doc.term]
                    ont_doc.save()
Пример #4
0
    def _load_mongo(self):
        root = Ontology(ontology=self.ontology_name,
                        term="root",
                        successors=self.root_terms,
                        children=self.root_terms)
        root.save()
        for (node, data) in self.graph.nodes_iter(
                data=True):  # self.graph.add_node(node, **data)
            if node == "root":
                raise Exception("...")
            else:
                successors = self.graph.successors(node)
                _ancestors = self.complete_subgraph([node])

                database = "biological_process"
                if "go:0005575" in _ancestors:
                    database = "cellular_component"
                if "go:0003674" in _ancestors:
                    database = "molecular_function"

                ont_doc = Ontology(
                    ontology=self.ontology_name,
                    term=node,
                    name=data["name"],
                    database=database,
                    successors=self.all_successors(node, []),
                    children=successors,
                    description=self.go_dag.query_term(node.upper()).desc,
                    # successors_relationships=self.successors_relationships(node),
                    subclases=list(
                        set([
                            x.lower() for x in self.go_dag.query_term(
                                node.upper()).get_all_children()
                        ])))
                ont_doc.keywords = self.ki.extract_keywords(
                    [ont_doc.description, ont_doc.name, ont_doc.term])
                ont_doc.save()