示例#1
0
 def get_superclasses(self):
     if not self.superclasses.empty:
         return self.superclasses
     if self.from_backup:
         self.superclasses = open_pickle(SUPER_BACKUP_PATH)
         return self.superclasses
     engine = create_engine(self.db_url)
     data = """
         SELECT
             ts.tid, ts.superclass_tid,
             t1.label as term_label, t1.ilx as term_ilx,
             t2.label as superclass_label, t2.ilx as superclass_ilx
         FROM term_superclasses AS ts
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) as t1
         ON t1.id = ts.tid
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t2
         ON t2.id = ts.superclass_tid
     """
     self.superclasses = pd.read_sql(data, engine)
     create_pickle(self.superclasses, SUPER_BACKUP_PATH)
     return self.superclasses
示例#2
0
 def get_annotations(self):
     if not self.annotations:
         return self.fetch_annotations()
     if self.from_backup:
         self.annotations = open_pickle(ANNOS_BACKUP_PATH)
         return self.annotations
     engine = create_engine(self.db_url)
     data = """
         SELECT
             ta.tid, ta.annotation_tid as annotation_type_tid,
             t1.ilx as term_ilx, t2.ilx as annotation_type_ilx,
             t2.label as annotation_type_label,
             ta.value
         FROM term_annotations AS ta
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t1 ON ta.tid=t1.id
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t2 ON ta.annotation_tid=t2.id
     """
     self.annotations = pd.read_sql(data, engine)
     create_pickle(self.annotations, ANNOS_BACKUP_PATH)
     return self.annotations
示例#3
0
 def get_relationships(self):
     if not self.relationships.empty:
         return self.relationships
     if self.from_backup:
         self.relationships = open_pickle(RELAS_BACKUP_PATH)
         return self.relationships
     engine = create_engine(self.db_url)
     data = """
        SELECT
            t1.id as term1_tid, t1.ilx AS term1_ilx, t1.type as term1_type,
            t2.id as term2_tid, t2.ilx AS term2_ilx, t2.type as term2_type,
            t3.id as relationship_tid, t3.ilx AS relationship_ilx, t3.label as relationship_label
        FROM term_relationships AS tr
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) t1 ON t1.id = tr.term1_id
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) AS t2 ON t2.id = tr.term2_id
        JOIN (
            SELECT *
            FROM terms
            GROUP BY terms.ilx
        ) AS t3 ON t3.id = tr.relationship_tid
     """
     self.relationships = pd.read_sql(data, engine)
     create_pickle(self.relationships, RELAS_BACKUP_PATH)
     return self.relationships
示例#4
0
 def __create_pickles(self):
     sb.call('mkdir ' + self.output, shell=True)
     for i, f in enumerate(self.files, 1):
         print(i, len(self.files), f)
         name = p(f).stem
         output = p(self.output) / name
         create_pickle(Graph2Pandas(f).df, p(self.output) / name)
示例#5
0
 def get_terms(self):
     ''' GROUP BY is a shortcut to only getting the first in every list of group '''
     if not self.terms.empty:
         return self.terms
     if self.from_backup:
         self.terms = open_pickle(TERMS_BACKUP_PATH)
         return self.terms
     engine = create_engine(self.db_url)
     data = """
         SELECT t.id as tid, t.ilx, t.label, t.definition, t.type, t.comment, t.version, t.uid, t.time
         FROM terms t
         GROUP BY t.ilx
     """
     self.terms = pd.read_sql(data, engine)
     create_pickle(self.terms, TERMS_BACKUP_PATH)
     return self.terms
示例#6
0
 def get_synonyms(self):
     if not self.synonyms.empty:
         return self.synonyms
     if self.from_backup:
         self.synonyms = open_pickle(SYNOS_BACKUP_PATH)
         return self.synonyms
     engine = create_engine(self.db_url)
     data = """
         SELECT ts.tid as tid, t.ilx, ts.literal, ts.type
         FROM term_synonyms AS ts
         JOIN (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) AS t
         WHERE ts.tid=t.id
     """
     self.synonyms = pd.read_sql(data, engine)
     create_pickle(self.synonyms, SYNOS_BACKUP_PATH)
     return self.synonyms
示例#7
0
 def get_existing_ids(self):
     if not self.existing_ids.empty:
         return self.existing_ids
     if self.from_backup:
         self.existing_ids = open_pickle(EXIDS_BACKUP_PATH)
         return self.existing_ids
     engine = create_engine(self.db_url)
     data = """
         SELECT tei.tid, tei.curie, tei.iri, tei.preferred, t.ilx, t.label, t.definition
         FROM (
             SELECT *
             FROM terms
             GROUP BY terms.ilx
         ) as t
         JOIN term_existing_ids AS tei
         ON t.id = tei.tid
     """
     self.existing_ids = pd.read_sql(data, engine)
     create_pickle(self.existing_ids, EXIDS_BACKUP_PATH)
     return self.existing_ids
示例#8
0
 def get_terms_complete(self) -> pd.DataFrame:
     ''' Gets complete entity data like term/view '''
     if not self.terms_complete.empty:
         return self.terms_complete
     if self.from_backup:
         self.terms_complete = open_pickle(TERMS_COMPLETE_BACKUP_PATH)
         return self.terms_complete
     ilx2synonyms = self.get_ilx2synonyms()
     ilx2existing_ids = self.get_ilx2existing_ids()
     ilx2annotations = self.get_ilx2annotations()
     ilx2superclass = self.get_ilx2superclass()
     ilx_complete = []
     header = ['Index'] + list(self.fetch_terms().columns)
     for row in self.fetch_terms().itertuples():
         row = {header[i]:val for i, val in enumerate(row)}
         row['synonyms'] = ilx2synonyms.get(row['ilx'])
         row['existing_ids'] = ilx2existing_ids[row['ilx']] # if breaks we have worse problems
         row['annotations'] = ilx2annotations.get(row['ilx'])
         row['superclass'] = ilx2superclass.get(row['ilx'])
         ilx_complete.append(row)
     terms_complete = pd.DataFrame(ilx_complete)
     create_pickle(terms_complete, TERMS_COMPLETE_BACKUP_PATH)
     return terms_complete
示例#9
0
    if not data.get(row.ilx):
        data[row.ilx] = defaultdict(set)
    data[row.ilx]['label'].add(row.label)
    data[row.ilx]['ilx'].add(row.ilx)
    data[row.ilx]['definition'].add(row.definition)

for row in annos.itertuples():
    data[row.term_ilx]['annotation'].add((row.annotation_label, row.value))

for row in synonyms.itertuples():
    if row.literal:
        data[row.ilx]['synonym'].add(row.literal)

for row in existing_ids.itertuples():
    data[row.ilx]['existing_ids'].add((row.curie, row.iri))

for row in superclasses.itertuples():
    data[row.term_ilx]['superclass_ilx'].add(row.superclass_ilx)
    superclass_exs = data[row.superclass_ilx]['existing_ids']
    for superclass_ex in superclass_exs:
        data[row.term_ilx]['superclass_existing_ids'].add(superclass_ex)

for row in relationships.itertuples():
    data[row.term1_ilx]['relationship'].add(
        (row.term1_ilx, row.relationship_ilx, row.term2_ilx))
    data[row.term2_ilx]['relationship'].add(
        (row.term2_ilx, row.relationship_ilx, row.term1_ilx))

df = pd.DataFrame.from_records([record for record in data.values()])
create_pickle(df, output)
示例#10
0
    relationship_ilx_uri = '/'.join([ilx_uri_base, row.relationship_ilx])

    graph.bind(prefix, relationship_ilx_uri)

    relationship_ilx_uri = URIRef(relationship_ilx_uri)

    term1_ilx_uri = '/'.join([ilx_uri_base, row.term1_ilx])
    term1_ilx_uri = URIRef(term1_ilx_uri)
    if not in_sanity_check.get(term1_ilx_uri):
        print('relationships', term1_ilx_uri)

    term2_ilx_uri = '/'.join([ilx_uri_base, row.term2_ilx])
    term2_ilx_uri = URIRef(term2_ilx_uri)
    if not in_sanity_check.get(term2_ilx_uri):
        print('relationships', term2_ilx_uri)

    graph.add((term1_ilx_uri, relationship_ilx_uri, term2_ilx_uri))
    graph.add((term2_ilx_uri, relationship_ilx_uri, term1_ilx_uri))
print('=== relationship triples complete ===')

graph.serialize(destination=str(p.home() /
                                'Dropbox/interlex_backups/InterLex.ttl'),
                format='turtle')
graph.serialize(destination=str(
    p.home() /
    'Dropbox/interlex_backups/SciGraph/SciGraph-core/src/test/resources/ontologies/'
),
                format='turtle')
create_pickle(graph,
              p.home() / 'Dropbox/interlex_backups/InterLex.graph.pickle')
示例#11
0
from pathlib import Path as p
from ilxutils.interlex_sql import IlxSql
from ilxutils.tools import create_pickle
import os

sql = IlxSql(db_url=os.environ.get('SCICRUNCH_DB_URL_PRODUCTION'))

terms = sql.get_terms()
create_pickle(terms,
              p.home() / 'Dropbox/interlex_backups/ilx_db_terms_backup.pickle')
print('=== terms backup complete ===')
del terms

annos = sql.get_annotations()
create_pickle(annos,
              p.home() / 'Dropbox/interlex_backups/ilx_db_annos_backup.pickle')
print('=== annotations backup complete ===')
del annos

ex = sql.get_existing_ids()
create_pickle(ex,
              p.home() / 'Dropbox/interlex_backups/ilx_db_ex_backup.pickle')
print('=== existing ids backup complete ===')
del ex

synonyms = sql.get_synonyms()
create_pickle(
    synonyms,
    p.home() / 'Dropbox/interlex_backups/ilx_db_synonyms_backup.pickle')
print('=== synonyms backup complete ===')
del synonyms