Пример #1
0
    def rdf_level(gene_id, condition_label, base_condition_label,
                  ext_base_condition_label, fold_change, pvalue, time_point):
        # TODO: it would be more correct to not change the case, but we want Knet compatibility
        exp_uri = make_ae_exp_uri(exp_acc)
        gene_id_nrm = gene_id.lower()
        gene_uri = make_gene_uri(gene_id)

        cond_id = make_id(condition_label, skip_non_word_chars=False)
        cond_uri = make_condition_uri(condition_label)

        base_cond_id = make_id(base_condition_label, skip_non_word_chars=False)
        base_cond_uri = make_condition_uri(base_condition_label)

        ext_base_cond_id, ext_base_cond_uri = None, None
        if ext_base_condition_label:
            ext_base_cond_id = make_id(ext_base_condition_label,
                                       skip_non_word_chars=False)
            ext_base_cond_uri = make_condition_uri(ext_base_condition_label)

        exp_stmt_uri = f"bkr:gxaexp_{exp_acc}_{gene_id_nrm}_{cond_id}_vs_{base_cond_id}"
        if ext_base_cond_uri:
            exp_stmt_uri += f"_in_{ext_base_cond_id}"

        if time_point != -1: exp_stmt_uri += f"_{time_point}h"

        rdf = f"""
			{exp_stmt_uri} a rdfs:Statement;
				rdf:subject {gene_uri};
				rdf:predicate bioschema:expressedIn;
				rdf:object {cond_uri};
				agri:baseCondition {base_cond_uri};
				agri:log2FoldChange {fold_change};
				agri:pvalue {pvalue};
				agri:evidence {exp_uri}
			.
				
			{gene_uri} bioschema:expressedIn {cond_uri}.
		"""

        rdf = dedent(rdf)

        if ext_base_cond_uri:
            rdf += f"\n{exp_stmt_uri} agri:baseCondition {ext_base_cond_uri}.\n"

        if time_point != -1:
            time_point_str = str(time_point) + " hours"
            time_point_uri = make_condition_uri(time_point_str)
            rdf += f"\n{exp_stmt_uri} agri:timePoint {time_point_uri}."

        print(rdf, file=out)
Пример #2
0
    def rdf_specie(exp_uri: str, specie_label: str):
        if not specie_label: return ""
        specie_uri = "bkr:specie_" + make_id(specie_label,
                                             skip_non_word_chars=True)
        rdf = f"""
			{exp_uri} schema:additionalProperty {specie_uri}.
			{specie_uri} a schema:PropertyValue;
				schema:propertyID "organism";
				schema:value "{specie_label}";
		"""
        rdf = dedent(rdf)

        specie_terms = specie2terms.get(specie_label)
        if specie_terms:
            rdf_terms = ", ".join(["<" + s + ">" for s in specie_terms])
            rdf += "\tdc:type " + rdf_terms + ";\n"

        rdf += ".\n"
        return rdf
Пример #3
0
    def rdf_level(gene_id, condition_label, tpm, ordinal_tpm):
        # TODO: it would be more correct to not change the case, but we want Knet compatibility
        exp_uri = make_ae_exp_uri(exp_acc)
        gene_id_nrm = gene_id.lower()
        gene_uri = make_gene_uri(gene_id)
        cond_id = make_id(condition_label, skip_non_word_chars=False)
        cond_uri = make_condition_uri(condition_label)

        rdf = f"""
			bkr:gxaexp_{exp_acc}_{gene_id_nrm}_{cond_id} a rdfs:Statement;
				agri:tpmCount {tpm};
				agri:ordinalTpm "{ordinal_tpm}";
				rdf:subject {gene_uri};
				rdf:predicate bioschema:expressedIn;
				rdf:object {cond_uri};
				agri:evidence {exp_uri}
			.
				
			{gene_uri} bioschema:expressedIn {cond_uri}.
		"""
        rdf = dedent(rdf)

        print(rdf, file=out)
Пример #4
0
def rdf_ae_experiment(exp_js: dict, out=stdout) -> str:

    specie2terms = {
        "arabidopsis thaliana":
        ["http://purl.bioontology.org/ontology/NCBITAXON/3701"],
        "triticum aestivum":
        ["http://purl.bioontology.org/ontology/NCBITAXON/4565"]
    }

    def rdf_specie(exp_uri: str, specie_label: str):
        if not specie_label: return ""
        specie_uri = "bkr:specie_" + make_id(specie_label,
                                             skip_non_word_chars=True)
        rdf = f"""
			{exp_uri} schema:additionalProperty {specie_uri}.
			{specie_uri} a schema:PropertyValue;
				schema:propertyID "organism";
				schema:value "{specie_label}";
		"""
        rdf = dedent(rdf)

        specie_terms = specie2terms.get(specie_label)
        if specie_terms:
            rdf_terms = ", ".join(["<" + s + ">" for s in specie_terms])
            rdf += "\tdc:type " + rdf_terms + ";\n"

        rdf += ".\n"
        return rdf

    def rdf_publication(exp_uri, exp_js):
        if "bibliography" not in exp_js: return ""
        rdf = ""
        for pub_js in exp_js["bibliography"]:
            # Without this very minimum, it's hardly a meaningful entry
            if not ("title" in pub_js or "accession" in pub_js
                    or "doi" in pub_js):
                continue
            if "accession" in pub_js:
                pub_uri = "bkr:pmid_" + str(pub_js["accession"])
            elif "doi" in pub_js:
                pub_uri = pub_js["doi"]
            else:
                pub_uri = "bkr:pub_" + hash_generator(pub_js.values())

            rdf += f"""
				{exp_uri} schema:subjectOf {pub_uri}.
				{pub_uri} a agri:ScholarlyPublication;
			"""
            rdf = dedent(rdf)

            rdf += rdf_text(pub_js, "title", "\tdc:title")
            rdf += rdf_text(pub_js, "authors", "\tagri:authorsList")
            rdf += rdf_str(pub_js, "accession", "\tagri:pmedId")
            rdf += rdf_str(pub_js, "doi", "\tagri:doiId")
            rdf += rdf_str(pub_js, "year", "\tschema:datePublished")

            rdf += ".\n"

        return dedent(rdf)

    exp_acc = exp_js["accession"]
    exp_uri = make_ae_exp_uri(exp_acc)

    rdf = f"""
		{exp_uri} a bioschema:Study;
			schema:identifier "{exp_acc}";
	"""
    rdf = dedent(rdf)
    rdf += rdf_text(exp_js, "name", "\tdc:title")

    # TODO: not clear why they're arrays
    if exp_js["description"]:
        rdf += rdf_text(exp_js["description"][0], "text",
                        "\tschema:description")

    rdf += rdf_str(exp_js, "releasedate", "\tschema:datePublished")

    # gxaAnalysisType is added by gxa.gxa_get_experiment_descriptors() and they can be 'Differential', 'Baseline'
    # Detailed specifications for such types are in gxa-defaults.ttl, here we create a link to the corresponding
    # URIs used there
    #
    rdf += rdf_pval(
        exp_js, "gxaAnalysisType", "\tschema:additionalProperty",
        lambda gxa_type: "bkr:gxa_analysis_type_" + make_id(
            gxa_type, skip_non_word_chars=True))

    rdf += ".\n"

    for specie in exp_js.get("organism", []):
        rdf += rdf_specie(exp_uri, specie)
    rdf += rdf_publication(exp_uri, exp_js)

    if out:
        print(rdf, file=out)
    else:
        return rdf
Пример #5
0
def make_condition_uri(condition_label: str) -> str:
    return "bkr:cond_" + make_id(condition_label, skip_non_word_chars=False)