Пример #1
0
def get_species(
    genera=None,
    subgenera=None,
    sections=None,
    species=None,
    strains=None,
    strain_ids=None,
    markers=None,
    types=False,
):
    """Query database for species."""
    q = session.query(Strain).join(Species, Section, Subgenus, Genus)
    if genera:
        q = q.filter(Genus.name.in_(genera))
    if subgenera:
        q = q.filter(Subgenus.name.in_(subgenera))
    if sections:
        q = q.filter(Section.name.in_(sections))
    if species:
        q = q.filter(Species.epithet.in_(species))
    if strains:
        q = q.filter(
            Strain.strain_names.any(StrainName.name.in_(strains))
            | Species.type.in_(strains)
        )
    if strain_ids:
        q = q.filter(Strain.id.in_(strain_ids))
    if types:
        q = q.filter(Strain.is_ex_type==True)
    if markers:
        mq = session.query(MarkerType).filter(MarkerType.name.in_(markers)).all()
        if len(mq) != len(markers):
            raise ValueError("Marker mismatch; misspelled marker name?")
        q = q.filter(*[Strain.markers.any(marker_type=m) for m in mq])
    return q.all()
Пример #2
0
def index(page=None):
    query = (session.query(Strain).join(Species, Section, Subgenus,
                                        Genus).order_by(
                                            Strain.id, Genus.name,
                                            Species.epithet))
    return render_template("index.html",
                           markers=[m.name for m in session.query(MarkerType)],
                           strains=get_page(query, per_page=20, page=page))
Пример #3
0
def get_strains(strain_ids=None):
    query = (session.query(Strain).join(Species, Section, Subgenus,
                                        Genus).order_by(
                                            Strain.id, Genus.name,
                                            Species.epithet))

    if strain_ids:
        query = query.filter(Strain.id.in_(strain_ids))

    return {
        "strains": [{
            "id": strain.id,
            "mycobank": strain.species.mycobank,
            "subgenus": strain.species.section.subgenus,
            "section": strain.species.section.name,
            "genus": strain.species.genus,
            "epithet": strain.species.epithet,
            "strains": [sn.name for sn in strain.strain_names],
            "holotype": strain.species.type,
            "is_ex_type": strain.is_ex_type,
            "markers": {m.marker: m.accession
                        for m in strain.markers},
            **{m.marker: m.accession
               for m in strain.markers}
        } for strain in query]
    }
Пример #4
0
def add_leaf_labels(tree, bold=None, types=None):
    """Form leaf labels for an ETE3 Tree object.

    Generates nice leaf labels using HTML formatting. Trying to use multiple separate
    faces with separate columns results in weird spacing when exporting/editing.

    However, requires editing in .pdf format rather than .svg.
    """

    leaves = tree.get_leaf_names()

    strains = {
        str(strain.id): strain
        for strain in session.query(Strain).filter(Strain.id.in_(leaves))
    }

    for leaf in tree.iter_leaves():
        s = strains[leaf.name]

        genus = s.species.genus
        epithet = s.species.epithet

        use_type = True if types and epithet in types else False
        use_bold = True if bold and epithet in bold else False

        name = s.strain_names[0].name if not use_type else s.species.type
        label = f"<i>{genus[0]}. {epithet}</i>  {name}"

        if s.is_ex_type and not use_type:
            label += "<sup>T</sup>"
        if use_bold:
            label = f"<b>{label}</b>"

        face = DynamicItemFace(label_maker, label=label)
        leaf.add_face(face, 0)
Пример #5
0
def add_section_annotations(tree: Tree) -> None:
    """Annotates taxonomic sections.

    Pretty hacky. Finds first common ancestor of leaf nodes per section,
    then sets a bgcolor. If a section contains a single node, then only
    that node is styled. Also adds a section label, but exact position
    is determined by which node gets found first using search_nodes().

    Relies on accurate section annotation - FP strains were set to Talaromyces
    which breaks this.
    """
    leaves = tree.get_leaf_names()
    sections = defaultdict(list)
    for strain in session.query(Strain).filter(Strain.id.in_(leaves)):
        if "FP" in strain.species.epithet:
            continue
        sections[strain.species.section.name].append(str(strain.id))

    index = 0
    colours = [
        "LightSteelBlue",
        "Moccasin",
        "DarkSeaGreen",
        "Khaki",
        "LightSalmon",
        "Turquoise",
        "Thistle"
    ]

    for section, ids in sections.items():
        # Find MRCA and set bgcolor of its node
        style = NodeStyle()
        style["bgcolor"] = colours[index]
        if len(ids) == 1:
            node = tree.search_nodes(name=ids[0])[0]
        else:
            node = tree.get_common_ancestor(*ids)
        node.set_style(style)

        # Grab first node found in this section, and add section label
        node = tree.search_nodes(name=ids[0])[0]
        face = faces.TextFace(section, fsize=20)
        node.add_face(face, column=1, position="aligned")

        # Wraparound colour scheme
        index += 1
        if index > len(colours) - 1:
            index = 0
Пример #6
0
def set_outgroup(tree, species):
    """Set an outgroup on an ETE3 Tree object."""

    leaves = tree.get_leaf_names()

    q = session.query(Strain.id).filter(Strain.id.in_(leaves)).join(Species)

    if isinstance(species, str):
        q = q.filter(Species.epithet == species)
    else:
        q = q.filter(Species.epithet.in_(species))

    count = q.count()

    if count == 0:
        raise ValueError("Found no match for given species")

    if count == 1:
        node = str(q.first()[0])
        tree.set_outgroup(node)

    elif count > 1:
        node = tree.get_common_ancestor(*[str(record[0]) for record in q])
        tree.set_outgroup(node)
Пример #7
0
def get_marker_sequences():
    if request.method != "POST":
        raise ValueError("Expected POST request to /react/sequences")

    content = request.get_json()

    form = content["format"] if "format" in content else "fasta"

    if not ("strains" in content and "markers" in content):
        raise KeyError("Expected strains and markers")

    if not (content["strains"] and content["markers"]):
        raise ValueError("Recieved empty content")

    query = (session.query(Marker).join(MarkerType).join(Strain).filter(
        Strain.id.in_(content["strains"])).filter(
            MarkerType.name.in_(content["markers"])).order_by(
                Marker.marker_type_id))

    if form == "json":
        records = {}

        for marker in query.order_by(Marker.marker_type_id):
            if marker.marker_type.name not in records:
                records[marker.marker_type.name] = []

            record = {
                "id": marker.id,
                "strain_id": marker.strain_id,
                "sequence": marker.sequence,
                "genus": marker.strain.species.section.subgenus.genus,
                "epithet": marker.strain.species.epithet,
                "strains": marker.strain.names,
            }

            records[marker.marker_type.name].append(record)

    elif form == "fasta":
        records = defaultdict(list)

        for marker in query:
            sequence = phy.Sequence(
                # marker.strain_id,
                f"{marker.strain.species.name} {marker.strain.names[0]}".
                replace(" ", "_"),
                marker.sequence)
            records[marker.marker_type.name].append(sequence)

        if content["aligned"]:
            for marker, sequences in records.items():
                records[marker] = phy.align_sequences(sequences,
                                                      marker,
                                                      tool="muscle",
                                                      trim_msa=True)

            if content["concatenated"]:
                msa = phy.MultiMSA([msa for msa in records.values()])
                archive = form_zip([
                    {
                        "name": "markers.fna",
                        "content": msa.fasta()
                    },
                    {
                        "name": "partitions.text",
                        "content": msa.raxml_partitions()
                    },
                ])
            else:
                msas = [{
                    "name": f"{marker}.fna",
                    "content": msa.fasta()
                } for marker, msa in records.items()]
                archive = form_zip(msas)
        else:
            fastas = [{
                "name": f"{marker}.fna",
                "content": "\n".join(s.fasta() for s in sequences)
            } for marker, sequences in records.items()]
            archive = form_zip(fastas)

    return send_file(
        archive,
        mimetype="application/zip",
        as_attachment=True,
        attachment_filename="fungiphy.zip",
    )
Пример #8
0
def get_marker_types():
    return {
        "markers": [marker[0] for marker in session.query(MarkerType.name)]
    }
Пример #9
0
def get_marker_accessions():
    return set(
        marker.accession
        for marker in session.query(Marker)
    )
Пример #10
0
def get_marker_types():
    return {
        marker.name: marker.id
        for marker in session.query(MarkerType)
    }
Пример #11
0
def get_strain_names():
    return set(
        strain.name
        for strain in session.query(StrainName)
    )
Пример #12
0
def get_sections():
    return {
        section.name: section.id
        for section in session.query(Section)
    }
Пример #13
0
def query_dict(model, key="name"):
    return {getattr(obj, key): obj for obj in session.query(model)}