def get_species( genera=None, subgenera=None, sections=None, species=None, strains=None, strain_ids=None, markers=None, types=False, ): """Query database for species.""" q = session.query(Strain).join(Species, Section, Subgenus, Genus) if genera: q = q.filter(Genus.name.in_(genera)) if subgenera: q = q.filter(Subgenus.name.in_(subgenera)) if sections: q = q.filter(Section.name.in_(sections)) if species: q = q.filter(Species.epithet.in_(species)) if strains: q = q.filter( Strain.strain_names.any(StrainName.name.in_(strains)) | Species.type.in_(strains) ) if strain_ids: q = q.filter(Strain.id.in_(strain_ids)) if types: q = q.filter(Strain.is_ex_type==True) if markers: mq = session.query(MarkerType).filter(MarkerType.name.in_(markers)).all() if len(mq) != len(markers): raise ValueError("Marker mismatch; misspelled marker name?") q = q.filter(*[Strain.markers.any(marker_type=m) for m in mq]) return q.all()
def index(page=None): query = (session.query(Strain).join(Species, Section, Subgenus, Genus).order_by( Strain.id, Genus.name, Species.epithet)) return render_template("index.html", markers=[m.name for m in session.query(MarkerType)], strains=get_page(query, per_page=20, page=page))
def get_strains(strain_ids=None): query = (session.query(Strain).join(Species, Section, Subgenus, Genus).order_by( Strain.id, Genus.name, Species.epithet)) if strain_ids: query = query.filter(Strain.id.in_(strain_ids)) return { "strains": [{ "id": strain.id, "mycobank": strain.species.mycobank, "subgenus": strain.species.section.subgenus, "section": strain.species.section.name, "genus": strain.species.genus, "epithet": strain.species.epithet, "strains": [sn.name for sn in strain.strain_names], "holotype": strain.species.type, "is_ex_type": strain.is_ex_type, "markers": {m.marker: m.accession for m in strain.markers}, **{m.marker: m.accession for m in strain.markers} } for strain in query] }
def add_leaf_labels(tree, bold=None, types=None): """Form leaf labels for an ETE3 Tree object. Generates nice leaf labels using HTML formatting. Trying to use multiple separate faces with separate columns results in weird spacing when exporting/editing. However, requires editing in .pdf format rather than .svg. """ leaves = tree.get_leaf_names() strains = { str(strain.id): strain for strain in session.query(Strain).filter(Strain.id.in_(leaves)) } for leaf in tree.iter_leaves(): s = strains[leaf.name] genus = s.species.genus epithet = s.species.epithet use_type = True if types and epithet in types else False use_bold = True if bold and epithet in bold else False name = s.strain_names[0].name if not use_type else s.species.type label = f"<i>{genus[0]}. {epithet}</i> {name}" if s.is_ex_type and not use_type: label += "<sup>T</sup>" if use_bold: label = f"<b>{label}</b>" face = DynamicItemFace(label_maker, label=label) leaf.add_face(face, 0)
def add_section_annotations(tree: Tree) -> None: """Annotates taxonomic sections. Pretty hacky. Finds first common ancestor of leaf nodes per section, then sets a bgcolor. If a section contains a single node, then only that node is styled. Also adds a section label, but exact position is determined by which node gets found first using search_nodes(). Relies on accurate section annotation - FP strains were set to Talaromyces which breaks this. """ leaves = tree.get_leaf_names() sections = defaultdict(list) for strain in session.query(Strain).filter(Strain.id.in_(leaves)): if "FP" in strain.species.epithet: continue sections[strain.species.section.name].append(str(strain.id)) index = 0 colours = [ "LightSteelBlue", "Moccasin", "DarkSeaGreen", "Khaki", "LightSalmon", "Turquoise", "Thistle" ] for section, ids in sections.items(): # Find MRCA and set bgcolor of its node style = NodeStyle() style["bgcolor"] = colours[index] if len(ids) == 1: node = tree.search_nodes(name=ids[0])[0] else: node = tree.get_common_ancestor(*ids) node.set_style(style) # Grab first node found in this section, and add section label node = tree.search_nodes(name=ids[0])[0] face = faces.TextFace(section, fsize=20) node.add_face(face, column=1, position="aligned") # Wraparound colour scheme index += 1 if index > len(colours) - 1: index = 0
def set_outgroup(tree, species): """Set an outgroup on an ETE3 Tree object.""" leaves = tree.get_leaf_names() q = session.query(Strain.id).filter(Strain.id.in_(leaves)).join(Species) if isinstance(species, str): q = q.filter(Species.epithet == species) else: q = q.filter(Species.epithet.in_(species)) count = q.count() if count == 0: raise ValueError("Found no match for given species") if count == 1: node = str(q.first()[0]) tree.set_outgroup(node) elif count > 1: node = tree.get_common_ancestor(*[str(record[0]) for record in q]) tree.set_outgroup(node)
def get_marker_sequences(): if request.method != "POST": raise ValueError("Expected POST request to /react/sequences") content = request.get_json() form = content["format"] if "format" in content else "fasta" if not ("strains" in content and "markers" in content): raise KeyError("Expected strains and markers") if not (content["strains"] and content["markers"]): raise ValueError("Recieved empty content") query = (session.query(Marker).join(MarkerType).join(Strain).filter( Strain.id.in_(content["strains"])).filter( MarkerType.name.in_(content["markers"])).order_by( Marker.marker_type_id)) if form == "json": records = {} for marker in query.order_by(Marker.marker_type_id): if marker.marker_type.name not in records: records[marker.marker_type.name] = [] record = { "id": marker.id, "strain_id": marker.strain_id, "sequence": marker.sequence, "genus": marker.strain.species.section.subgenus.genus, "epithet": marker.strain.species.epithet, "strains": marker.strain.names, } records[marker.marker_type.name].append(record) elif form == "fasta": records = defaultdict(list) for marker in query: sequence = phy.Sequence( # marker.strain_id, f"{marker.strain.species.name} {marker.strain.names[0]}". replace(" ", "_"), marker.sequence) records[marker.marker_type.name].append(sequence) if content["aligned"]: for marker, sequences in records.items(): records[marker] = phy.align_sequences(sequences, marker, tool="muscle", trim_msa=True) if content["concatenated"]: msa = phy.MultiMSA([msa for msa in records.values()]) archive = form_zip([ { "name": "markers.fna", "content": msa.fasta() }, { "name": "partitions.text", "content": msa.raxml_partitions() }, ]) else: msas = [{ "name": f"{marker}.fna", "content": msa.fasta() } for marker, msa in records.items()] archive = form_zip(msas) else: fastas = [{ "name": f"{marker}.fna", "content": "\n".join(s.fasta() for s in sequences) } for marker, sequences in records.items()] archive = form_zip(fastas) return send_file( archive, mimetype="application/zip", as_attachment=True, attachment_filename="fungiphy.zip", )
def get_marker_types(): return { "markers": [marker[0] for marker in session.query(MarkerType.name)] }
def get_marker_accessions(): return set( marker.accession for marker in session.query(Marker) )
def get_marker_types(): return { marker.name: marker.id for marker in session.query(MarkerType) }
def get_strain_names(): return set( strain.name for strain in session.query(StrainName) )
def get_sections(): return { section.name: section.id for section in session.query(Section) }
def query_dict(model, key="name"): return {getattr(obj, key): obj for obj in session.query(model)}