Exemplo n.º 1
0
def missing_species_names(names):
    """returns a Table of missing species names, or None"""
    missing = []
    for name in names:
        n = Species.get_species_name(name)
        if n == "None":
            missing.append([name])

    if missing:
        result = make_table(header=["MISSING SPECIES"], data=missing)
    else:
        result = None
    return result
Exemplo n.º 2
0
def get_chrom_names(ref_species, compara):
    """returns the list of chromosome names"""
    genome_db = compara.ComparaDb.get_table("genome_db")
    dnafrag = compara.ComparaDb.get_table("dnafrag")
    joined = genome_db.join(
        dnafrag, onclause=genome_db.c.genome_db_id == dnafrag.c.genome_db_id)
    condition = sql.and_(
        dnafrag.c.coord_system_name == "chromosome",
        genome_db.c.name == Species.get_ensembl_db_prefix(ref_species),
        dnafrag.c.is_reference == 1,
    )
    query = sql.select([dnafrag.c.name], condition).select_from(joined)
    chroms = [r[0] for r in query.execute()]
    return chroms
Exemplo n.º 3
0
def renamed_seqs(aln):
    """renames sequences to be just species common name"""
    new = []
    names = Counter()
    for seq in aln.seqs:
        latin = get_latin_from_label(seq.name)
        common = Species.get_common_name(latin)
        names[common] += 1
        seq.name = common
        new.append((seq.name, seq))

    if max(list(names.values())) > 1:
        # a species occures more than once
        return None

    return make_aligned_seqs(data=new, moltype=DNA, array_align=False)
Exemplo n.º 4
0
def display_available_dbs(account, release=None):
    """displays the available Ensembl databases at the nominated host"""
    db_list = get_db_name(account=account, db_type="core", release=release)
    db_list += get_db_name(account=account, db_type="compara", release=release)
    rows = []
    for db_name in db_list:
        species_name = db_name.species
        if species_name:
            common_name = Species.get_common_name(db_name.species, level="ignore")

        if "compara" in db_name.name:
            species_name = common_name = "-"
        rows.append([db_name.release, db_name.name, species_name, common_name])

    table = make_table(
        header=["Release", "Db Name", "Species", "Common Name"], data=rows, space=2
    )
    table = table.sorted(["Release", "Db Name"])
    table.legend = (
        "Values of 'None' indicate cogent does not have a value for that database name."
    )
    return table
Exemplo n.º 5
0
def get_one2one_orthologs(
    compara, ref_genes, outpath, not_strict, force_overwrite, test
):
    """writes one-to-one orthologs of protein coding genes to outpath"""

    species = Counter(compara.species)
    written = 0
    records = []
    with click.progressbar(ref_genes, label="Finding 1to1 orthologs") as ids:
        for gene in ids:
            outfile_name = os.path.join(outpath, "%s.fa.gz" % gene)
            if os.path.exists(outfile_name) and not force_overwrite:
                written += 1
                continue

            syntenic = list(
                compara.get_related_genes(
                    stableid=gene, relationship="ortholog_one2one"
                )
            )

            if len(syntenic) != 1:
                continue

            syntenic = syntenic[0]

            if not not_strict and (
                syntenic is None or Counter(syntenic.get_species_set()) != species
            ):
                # skipping, not all species had a 1to1 ortholog for this gene
                continue

            seqs = []
            for m in syntenic.members:
                records.append([gene, m.stableid, m.location, m.description])
                name = Species.get_common_name(m.genome.species)
                cds = m.canonical_transcript.cds.trim_stop_codon(allow_partial=True)
                cds.name = name
                seqs.append([name, cds])

            seqs = make_unaligned_seqs(data=seqs)
            if test:
                print()
                print(gene)
                print(seqs.to_fasta())
            else:
                with gzip.open(outfile_name, "wt") as outfile:
                    outfile.write(seqs.to_fasta() + "\n")
                LOGGER.output_file(outfile_name)

            written += 1
    if test:
        msg = "Would have written %d files to %s" % (written, outpath)
    else:
        msg = "Wrote %d files to %s" % (written, outpath)

    click.echo(msg)

    if written > 0:
        metadata = make_table(
            header=["refid", "stableid", "location", "description"], rows=records
        )
        metadata.write(os.path.join(outpath, "metadata.tsv"))

    return