Exemplo n.º 1
 def __init__(self, email: str, query_str: str, outprefix: str) -> None:
     Initialize class
     :param gene_list:
     self.paths = PhenoXPaths(outprefix)
     self.query_str = query_str
     self.email = email
Exemplo n.º 2
 def __init__(self, cluster_dict: Dict, meta_dict: Dict,
              outprefix: str) -> None:
     Initialize class
     :param cluster_dict: key=cluster_ids, value=list of GDS ids
     :param meta_dict: key=gds_ids, value=list of gds metrics (n_samples, dates, GPLs)
     :param meta_list: index list of gds categories
     self.paths = PhenoXPaths(outprefix)
     self.clusters = cluster_dict
     self.num_clusters = len(cluster_dict)
     self.meta_dict = meta_dict
     self.meta_list = ('Sample N', 'Submission Age', 'GPL')
Exemplo n.º 3
    def __init__(self, outprefix: str):
        Initialize MeSH search tool
        paths = PhenoXPaths(outprefix)
        mesh_json_path = os.path.join(paths.data_dir, 'mesh.json')
        self.mesh = dict()

        if not os.path.exists(mesh_json_path):
            mesh_bin_file = glob.glob(os.path.join(paths.data_dir, '*.bin'))
            if mesh_bin_file:
                self._parse_mesh_bin(mesh_bin_file[0], mesh_json_path)

        self.mesh = json.load(open(mesh_json_path, 'r'))
Exemplo n.º 4
    def __init__(self, outprefix, term, email, tool="phenotypeXpression", efetch_batch=5000, elink_batch=100):
        Entrez.email = email
        Entrez.tool = tool
        self.efetch_batch = efetch_batch
        self.elink_batch = elink_batch
        self.db = 'geoprofiles'

        self.paths = PhenoXPaths(outprefix)
        term_name = term.replace(' ', '-')
        self.hcluster_file = os.path.join(
            self.paths.output_dir, "{}_{}_hierarchical_clusters.pdf".format(self.paths.outprefix, term_name)
        self.tree_file = os.path.join(
            self.paths.output_dir, "{}_{}_newick_tree.txt".format(self.paths.outprefix, term_name)
        self.heatmap_file = os.path.join(
            self.paths.output_dir, "{}_{}_heatmap.pdf".format(self.paths.outprefix, term_name)
        self.dist_graph_file = os.path.join(
            self.paths.output_dir, "{}_{}_dist_graph.pdf".format(self.paths.outprefix, term_name)
Exemplo n.º 5
    def __init__(self, email: str, outprefix: str):
        Entrez.email = email
        self.paths = PhenoXPaths(outprefix)
        self.pmid_abstracts = dict()
        # disease and human phenotype NER
        self.pmid_dner = {}
        # raw entity text
        self.pmid_ent_text = {}
        self.dner_cluster = {}
        self.total_dner = []

        self.nlp = spacy.load('en')
        self.id2kw = pickle.load(
            open(os.path.join(self.paths.data_dir, 'id2kw_dict.pkl'), 'rb'))
        self.kw2id = pickle.load(
            open(os.path.join(self.paths.data_dir, 'kw2id_dict.pkl'), 'rb'))
        entity = Entity(keywords_list=list(self.kw2id.keys()), label='DO/HPO')
        self.nlp.add_pipe(entity, last=True)

        # read synonyms from HGNC
        with open(os.path.join(self.paths.data_dir, 'hgnc_synonyms.json'),
                  'r') as f:
            hgnc_syn = f.read()
            self.hgnc = json.loads(hgnc_syn)
Exemplo n.º 6
import os
import json

from phenox.paths import PhenoXPaths

paths = PhenoXPaths()

hgnc_json_file = os.path.join(paths.data_dir, 'hgnc.json')
hgnc_syn_file = os.path.join(paths.data_dir, 'hgnc_synonyms.json')

with open(hgnc_json_file, 'r') as f:
    text = f.read()
    hgnc = json.loads(text)

num_entries = hgnc['response']['numFound']
data = hgnc['response']['docs']

hgnc_dict = dict()

for entry in data:
        symbol = entry['symbol']
        alias_symbol = entry['alias_symbol'] if 'alias_symbol' in entry else []
        prev_symbol = entry['prev_symbol'] if 'prev_symbol' in entry else []

        name = entry['name']
        prev_name = entry['prev_name'] if 'prev_name' in entry else []

        hgnc_dict[symbol] = {
            'names': [name] + prev_name,
            'aliases': alias_symbol + prev_symbol
Exemplo n.º 7
 def __init__(self, outprefix: str):
     self.paths = PhenoXPaths(outprefix)