Exemplo n.º 1
0
 def get_annotations(self):
     if not self.annotations:
         return self.fetch_annotations()
     if self.from_backup:
         self.annotations = open_pickle(ANNOS_BACKUP_PATH)
         return self.annotations
     engine = create_engine(self.db_url)
     data = """
         SELECT
             ta.tid, ta.annotation_tid as annotation_type_tid,
             t1.ilx as term_ilx, t2.ilx as annotation_type_ilx,
             t2.label as annotation_type_label,
             ta.value
         FROM term_annotations AS ta
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t1 ON ta.tid=t1.id
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t2 ON ta.annotation_tid=t2.id
     """
     self.annotations = pd.read_sql(data, engine)
     create_pickle(self.annotations, ANNOS_BACKUP_PATH)
     return self.annotations
Exemplo n.º 2
0
 def get_superclasses(self):
     if not self.superclasses.empty:
         return self.superclasses
     if self.from_backup:
         self.superclasses = open_pickle(SUPER_BACKUP_PATH)
         return self.superclasses
     engine = create_engine(self.db_url)
     data = """
         SELECT
             ts.tid, ts.superclass_tid,
             t1.label as term_label, t1.ilx as term_ilx,
             t2.label as superclass_label, t2.ilx as superclass_ilx
         FROM term_superclasses AS ts
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) as t1
         ON t1.id = ts.tid
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t2
         ON t2.id = ts.superclass_tid
     """
     self.superclasses = pd.read_sql(data, engine)
     create_pickle(self.superclasses, SUPER_BACKUP_PATH)
     return self.superclasses
Exemplo n.º 3
0
 def get_relationships(self):
     if not self.relationships.empty:
         return self.relationships
     if self.from_backup:
         self.relationships = open_pickle(RELAS_BACKUP_PATH)
         return self.relationships
     engine = create_engine(self.db_url)
     data = """
        SELECT
            t1.id as term1_tid, t1.ilx AS term1_ilx, t1.type as term1_type,
            t2.id as term2_tid, t2.ilx AS term2_ilx, t2.type as term2_type,
            t3.id as relationship_tid, t3.ilx AS relationship_ilx, t3.label as relationship_label
        FROM term_relationships AS tr
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) t1 ON t1.id = tr.term1_id
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) AS t2 ON t2.id = tr.term2_id
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) AS t3 ON t3.id = tr.relationship_tid
     """
     self.relationships = pd.read_sql(data, engine)
     create_pickle(self.relationships, RELAS_BACKUP_PATH)
     return self.relationships
Exemplo n.º 4
0
 def get_terms(self):
     ''' GROUP BY is a shortcut to only getting the first in every list of group '''
     if not self.terms.empty:
         return self.terms
     if self.from_backup:
         self.terms = open_pickle(TERMS_BACKUP_PATH)
         return self.terms
     engine = create_engine(self.db_url)
     data = """
         SELECT t.id as tid, t.ilx, t.label, t.definition, t.type, t.comment, t.version, t.uid, t.time
         FROM terms t
         GROUP BY t.ilx
     """
     self.terms = pd.read_sql(data, engine)
     create_pickle(self.terms, TERMS_BACKUP_PATH)
     return self.terms
Exemplo n.º 5
0
 def get_synonyms(self):
     if not self.synonyms.empty:
         return self.synonyms
     if self.from_backup:
         self.synonyms = open_pickle(SYNOS_BACKUP_PATH)
         return self.synonyms
     engine = create_engine(self.db_url)
     data = """
         SELECT ts.tid as tid, t.ilx, ts.literal, ts.type
         FROM term_synonyms AS ts
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t
         WHERE ts.tid=t.id
     """
     self.synonyms = pd.read_sql(data, engine)
     create_pickle(self.synonyms, SYNOS_BACKUP_PATH)
     return self.synonyms
Exemplo n.º 6
0
 def get_existing_ids(self):
     if not self.existing_ids.empty:
         return self.existing_ids
     if self.from_backup:
         self.existing_ids = open_pickle(EXIDS_BACKUP_PATH)
         return self.existing_ids
     engine = create_engine(self.db_url)
     data = """
         SELECT tei.tid, tei.curie, tei.iri, tei.preferred, t.ilx, t.label, t.definition
         FROM (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) as t
         JOIN term_existing_ids AS tei
         ON t.id = tei.tid
     """
     self.existing_ids = pd.read_sql(data, engine)
     create_pickle(self.existing_ids, EXIDS_BACKUP_PATH)
     return self.existing_ids
Exemplo n.º 7
0
 def get_terms_complete(self) -> pd.DataFrame:
     ''' Gets complete entity data like term/view '''
     if not self.terms_complete.empty:
         return self.terms_complete
     if self.from_backup:
         self.terms_complete = open_pickle(TERMS_COMPLETE_BACKUP_PATH)
         return self.terms_complete
     ilx2synonyms = self.get_ilx2synonyms()
     ilx2existing_ids = self.get_ilx2existing_ids()
     ilx2annotations = self.get_ilx2annotations()
     ilx2superclass = self.get_ilx2superclass()
     ilx_complete = []
     header = ['Index'] + list(self.fetch_terms().columns)
     for row in self.fetch_terms().itertuples():
         row = {header[i]:val for i, val in enumerate(row)}
         row['synonyms'] = ilx2synonyms.get(row['ilx'])
         row['existing_ids'] = ilx2existing_ids[row['ilx']] # if breaks we have worse problems
         row['annotations'] = ilx2annotations.get(row['ilx'])
         row['superclass'] = ilx2superclass.get(row['ilx'])
         ilx_complete.append(row)
     terms_complete = pd.DataFrame(ilx_complete)
     create_pickle(terms_complete, TERMS_COMPLETE_BACKUP_PATH)
     return terms_complete
Exemplo n.º 8
0
from collections import defaultdict
from ilxutils.interlex_sql import IlxSql
from ilxutils.mydifflib import ratio
from ilxutils.tools import open_pickle, create_pickle
import os
import pandas as pd
from pathlib import Path
from sys import exit

sql = IlxSql(db_url=os.environ.get('SCICRUNCH_DB_URL_PRODUCTION'))
data = {}
output = Path.home() / 'Dropbox/ilx-df.pickle'
doc = {'--local': True}

if doc['--local']:
    terms = open_pickle(Path.home() /
                        'Dropbox/interlex_backups/ilx_db_terms_backup.pickle')
    annos = open_pickle(Path.home() /
                        'Dropbox/interlex_backups/ilx_db_annos_backup.pickle')
    synonyms = open_pickle(
        Path.home() / 'Dropbox/interlex_backups/ilx_db_synonyms_backup.pickle')
    superclasses = open_pickle(
        Path.home() /
        'Dropbox/interlex_backups/ilx_db_superclasses_backup.pickle')
    relationships = open_pickle(
        Path.home() /
        'Dropbox/interlex_backups/ilx_db_relationships_backup.pickle')
    existing_ids = open_pickle(
        Path.home() / 'Dropbox/interlex_backups/ilx_db_ex_backup.pickle')
elif doc['--src']:
    terms = sql.get_terms()
    annos = sql.get_annotations()
Exemplo n.º 9
0
    'oboInOwl': 'http://www.geneontology.org/formats/oboInOwl#',
    'owl': 'http://www.w3.org/2002/07/owl#',
    'ro': 'http://www.obofoundry.org/ro/ro.owl#',
    'skos': 'http://www.w3.org/2004/02/skos/core#',
    'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
    'rdfs': 'http://www.w3.org/2000/01/rdf-schema#',
    'prov': 'http://www.w3.org/ns/prov#',
})

ilx_uri_base = 'http://uri.interlex.org/base'
in_sanity_check = {}

DEFINITION = Namespace('http://purl.obolibrary.org/obo/IAO_0000115')
ILXTR = Namespace('http://uri.interlex.org/tgbugs/uris/readable/')

terms = open_pickle(p.home() /
                    'Dropbox/interlex_backups/ilx_db_terms_backup.pickle')
for row in terms.itertuples():
    ilx_uri = '/'.join([ilx_uri_base, row.ilx])
    in_sanity_check[ilx_uri] = True

    if row.type in ['term', 'cde', 'fde', 'pde']:
        rw.add_triple(ilx_uri, RDF.type, OWL.Class)
    elif row.type == 'annotation':
        pass  # g.add_triple(ilx_uri, RDF.type, OWL.AnnotationProperty)
    elif row.type == 'relationship':
        pass  # g.add_triple(ilx_uri, RDF.type, OWL.ObjectProperty)
    else:
        rw.add_triple(ilx_uri, RDF.type, OWL.Lost)
        print('We have an no type entity!', row.ilx)

    rw.add_triple(ilx_uri, RDFS.label, row.label)