def rdf_level(gene_id, condition_label, base_condition_label, ext_base_condition_label, fold_change, pvalue, time_point): # TODO: it would be more correct to not change the case, but we want Knet compatibility exp_uri = make_ae_exp_uri(exp_acc) gene_id_nrm = gene_id.lower() gene_uri = make_gene_uri(gene_id) cond_id = make_id(condition_label, skip_non_word_chars=False) cond_uri = make_condition_uri(condition_label) base_cond_id = make_id(base_condition_label, skip_non_word_chars=False) base_cond_uri = make_condition_uri(base_condition_label) ext_base_cond_id, ext_base_cond_uri = None, None if ext_base_condition_label: ext_base_cond_id = make_id(ext_base_condition_label, skip_non_word_chars=False) ext_base_cond_uri = make_condition_uri(ext_base_condition_label) exp_stmt_uri = f"bkr:gxaexp_{exp_acc}_{gene_id_nrm}_{cond_id}_vs_{base_cond_id}" if ext_base_cond_uri: exp_stmt_uri += f"_in_{ext_base_cond_id}" if time_point != -1: exp_stmt_uri += f"_{time_point}h" rdf = f""" {exp_stmt_uri} a rdfs:Statement; rdf:subject {gene_uri}; rdf:predicate bioschema:expressedIn; rdf:object {cond_uri}; agri:baseCondition {base_cond_uri}; agri:log2FoldChange {fold_change}; agri:pvalue {pvalue}; agri:evidence {exp_uri} . {gene_uri} bioschema:expressedIn {cond_uri}. """ rdf = dedent(rdf) if ext_base_cond_uri: rdf += f"\n{exp_stmt_uri} agri:baseCondition {ext_base_cond_uri}.\n" if time_point != -1: time_point_str = str(time_point) + " hours" time_point_uri = make_condition_uri(time_point_str) rdf += f"\n{exp_stmt_uri} agri:timePoint {time_point_uri}." print(rdf, file=out)
def rdf_specie(exp_uri: str, specie_label: str): if not specie_label: return "" specie_uri = "bkr:specie_" + make_id(specie_label, skip_non_word_chars=True) rdf = f""" {exp_uri} schema:additionalProperty {specie_uri}. {specie_uri} a schema:PropertyValue; schema:propertyID "organism"; schema:value "{specie_label}"; """ rdf = dedent(rdf) specie_terms = specie2terms.get(specie_label) if specie_terms: rdf_terms = ", ".join(["<" + s + ">" for s in specie_terms]) rdf += "\tdc:type " + rdf_terms + ";\n" rdf += ".\n" return rdf
def rdf_level(gene_id, condition_label, tpm, ordinal_tpm): # TODO: it would be more correct to not change the case, but we want Knet compatibility exp_uri = make_ae_exp_uri(exp_acc) gene_id_nrm = gene_id.lower() gene_uri = make_gene_uri(gene_id) cond_id = make_id(condition_label, skip_non_word_chars=False) cond_uri = make_condition_uri(condition_label) rdf = f""" bkr:gxaexp_{exp_acc}_{gene_id_nrm}_{cond_id} a rdfs:Statement; agri:tpmCount {tpm}; agri:ordinalTpm "{ordinal_tpm}"; rdf:subject {gene_uri}; rdf:predicate bioschema:expressedIn; rdf:object {cond_uri}; agri:evidence {exp_uri} . {gene_uri} bioschema:expressedIn {cond_uri}. """ rdf = dedent(rdf) print(rdf, file=out)
def rdf_ae_experiment(exp_js: dict, out=stdout) -> str: specie2terms = { "arabidopsis thaliana": ["http://purl.bioontology.org/ontology/NCBITAXON/3701"], "triticum aestivum": ["http://purl.bioontology.org/ontology/NCBITAXON/4565"] } def rdf_specie(exp_uri: str, specie_label: str): if not specie_label: return "" specie_uri = "bkr:specie_" + make_id(specie_label, skip_non_word_chars=True) rdf = f""" {exp_uri} schema:additionalProperty {specie_uri}. {specie_uri} a schema:PropertyValue; schema:propertyID "organism"; schema:value "{specie_label}"; """ rdf = dedent(rdf) specie_terms = specie2terms.get(specie_label) if specie_terms: rdf_terms = ", ".join(["<" + s + ">" for s in specie_terms]) rdf += "\tdc:type " + rdf_terms + ";\n" rdf += ".\n" return rdf def rdf_publication(exp_uri, exp_js): if "bibliography" not in exp_js: return "" rdf = "" for pub_js in exp_js["bibliography"]: # Without this very minimum, it's hardly a meaningful entry if not ("title" in pub_js or "accession" in pub_js or "doi" in pub_js): continue if "accession" in pub_js: pub_uri = "bkr:pmid_" + str(pub_js["accession"]) elif "doi" in pub_js: pub_uri = pub_js["doi"] else: pub_uri = "bkr:pub_" + hash_generator(pub_js.values()) rdf += f""" {exp_uri} schema:subjectOf {pub_uri}. {pub_uri} a agri:ScholarlyPublication; """ rdf = dedent(rdf) rdf += rdf_text(pub_js, "title", "\tdc:title") rdf += rdf_text(pub_js, "authors", "\tagri:authorsList") rdf += rdf_str(pub_js, "accession", "\tagri:pmedId") rdf += rdf_str(pub_js, "doi", "\tagri:doiId") rdf += rdf_str(pub_js, "year", "\tschema:datePublished") rdf += ".\n" return dedent(rdf) exp_acc = exp_js["accession"] exp_uri = make_ae_exp_uri(exp_acc) rdf = f""" {exp_uri} a bioschema:Study; schema:identifier "{exp_acc}"; """ rdf = dedent(rdf) rdf += rdf_text(exp_js, "name", "\tdc:title") # TODO: not clear why they're arrays if exp_js["description"]: rdf += rdf_text(exp_js["description"][0], "text", "\tschema:description") rdf += rdf_str(exp_js, "releasedate", "\tschema:datePublished") # gxaAnalysisType is added by gxa.gxa_get_experiment_descriptors() and they can be 'Differential', 'Baseline' # Detailed specifications for such types are in gxa-defaults.ttl, here we create a link to the corresponding # URIs used there # rdf += rdf_pval( exp_js, "gxaAnalysisType", "\tschema:additionalProperty", lambda gxa_type: "bkr:gxa_analysis_type_" + make_id( gxa_type, skip_non_word_chars=True)) rdf += ".\n" for specie in exp_js.get("organism", []): rdf += rdf_specie(exp_uri, specie) rdf += rdf_publication(exp_uri, exp_js) if out: print(rdf, file=out) else: return rdf
def make_condition_uri(condition_label: str) -> str: return "bkr:cond_" + make_id(condition_label, skip_non_word_chars=False)