def setUpClass(cls): """ Make sure we have some sort of schema and data in DB, only done once """ super(test_concept_Base, cls).setUpClass() global client, session # TODO this is not neat - this is basically emulating a constructor/destructor operation using globals client = GraknClient("localhost:48555") keyspace = "test_" + str(uuid.uuid4()).replace("-", "_")[:8] session = client.session(keyspace) # temp tx to set up DB, don"t save it tx = session.transaction().write() try: # define parentship roles to test agains tx.query( "define " "parent sub role; " "child sub role; " "mother sub role; " "son sub role; " "person sub entity, has age, has gender, plays parent, plays child, plays mother, plays son; " "age sub attribute, datatype long; " "gender sub attribute, datatype string; " "parentship sub relation, relates parent, relates child, relates mother, relates son;" ) except GraknError as ce: print(ce) answers = list(tx.query("match $x isa person, has age 20; get;")) if len(answers) == 0: tx.query("insert $x isa person, has age 20;") tx.commit()
def test_client_session_close(self): client = GraknClient('localhost:48555') a_session = client.session('test') a_session.close() with self.assertRaises(GraknError): a_session.transaction().read() client.close()
def __init__(self): # Load configuration self.config = yaml.load(open(os.path.dirname(__file__) + '/config.yml')) # Initialize Grakn client self.grakn = GraknClient(uri=self.config['grakn']['hostname'] + ':' + str(self.config['grakn']['port'])) self.session = self.grakn.session(keyspace='grakn') # Initialize ElasticSearch client self.elasticsearch = Elasticsearch( [{'host': self.config['elasticsearch']['hostname'], 'port': self.config['elasticsearch']['port']}], timeout=30) # Delete current indexes self.elasticsearch.indices.delete(index='stix-observables', ignore=[400, 404]) self.elasticsearch.indices.delete(index='external-references', ignore=[400, 404]) self.elasticsearch.indices.delete(index='stix-domain-entities', ignore=[400, 404]) self.elasticsearch.indices.delete(index='stix-relations', ignore=[400, 404]) # Create new indexes self.elasticsearch.indices.create(index='stix-observables', ignore=400, body={'settings': {'index': {'max_result_window': 100000}}}) self.elasticsearch.indices.create(index='external-references', ignore=400, body={'settings': {'index': {'max_result_window': 100000}}}) self.elasticsearch.indices.create(index='stix-domain-entities', ignore=400, body={'settings': {'index': {'max_result_window': 100000}}}) self.elasticsearch.indices.create(index='stix-relations', ignore=400, body={'settings': {'index': {'max_result_window': 100000}}})
def proteinAtlasMigrator(uri, keyspace, num, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) batches_pr = [] if num is not 0: print(' ') print('Opening HPA dataset...') print(' ') with open( '../biograkn-covid/Dataset/HumanProteinAtlas/normal_tissue.tsv', 'rt', encoding='utf-8') as csvfile: csvreader = csv.reader(csvfile, delimiter=' ') raw_file = [] n = 0 for row in csvreader: n = n + 1 if n is not 1: d = {} d['ensembl-gene-id'] = row[0] d['gene-symbol'] = row[1] d['tissue'] = row[2] d['expression-value'] = row[4] d['expression-value-reliability'] = row[5] raw_file.append(d) tissue = [] for r in raw_file[:num]: tissue.append(r['tissue']) tissue = (list(set(tissue))) insertTissue(tissue, session, num_threads) insertEnsemblId(raw_file, session, num_threads, ctn) insertGeneTissue(raw_file, session, num_threads, ctn)
def go_test(val_graphs, val_ge_split, reload_fle, **kwargs): # opens session once again, if closed after training client = GraknClient(uri=URI) session = client.session(keyspace=KEYSPACE) ge_graphs, solveds_tr, solveds_ge = pipeline( graphs=val_graphs, # Run the pipeline with prepared graph tr_ge_split=val_ge_split, do_test=True, save_fle="", reload_fle=reload_fle, **kwargs) with session.transaction().write() as tx: write_predictions_to_grakn( ge_graphs, tx) # Write predictions to grakn with learned probabilities session.close() client.close() # Grakn session will be closed here due to write\insert query validation_evals = [solveds_tr, solveds_ge] return ge_graphs, validation_evals
def test_match_query(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().read() as tx: tx.query("match $s sub thing; get;") session.close() client.close()
def dgidbMigrator(uri, keyspace, num_dr, num_int, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) insertDrugs(uri, keyspace, num_dr, num_threads, ctn, session) insertInteractions(uri, keyspace, num_int, num_threads, ctn, session) session.close() client.close()
def diagnosis_example(num_graphs=200, num_processing_steps_tr=5, num_processing_steps_ge=5, num_training_iterations=1000, keyspace=KEYSPACE, uri=URI): """ Run the diagnosis example from start to finish, including traceably ingesting predictions back into Grakn Args: num_graphs: Number of graphs to use for training and testing combined num_processing_steps_tr: The number of message-passing steps for training num_processing_steps_ge: The number of message-passing steps for testing num_training_iterations: The number of training epochs keyspace: The name of the keyspace to retrieve example subgraphs from uri: The uri of the running Grakn instance Returns: Final accuracies for training and for testing """ tr_ge_split = int(num_graphs * 0.5) generate_example_graphs(num_graphs, keyspace=keyspace, uri=uri) client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) graphs = create_concept_graphs(list(range(num_graphs)), session) with session.transaction().read() as tx: # Change the terminology here onwards from thing -> node and role -> edge node_types = get_thing_types(tx) [node_types.remove(el) for el in TYPES_TO_IGNORE] edge_types = get_role_types(tx) [edge_types.remove(el) for el in ROLES_TO_IGNORE] print(f'Found node types: {node_types}') print(f'Found edge types: {edge_types}') ge_graphs, solveds_tr, solveds_ge = pipeline( graphs, tr_ge_split, node_types, edge_types, num_processing_steps_tr=num_processing_steps_tr, num_processing_steps_ge=num_processing_steps_ge, num_training_iterations=num_training_iterations, continuous_attributes=CONTINUOUS_ATTRIBUTES, categorical_attributes=CATEGORICAL_ATTRIBUTES, output_dir=f"./events/{time.time()}/") with session.transaction().write() as tx: write_predictions_to_grakn(ge_graphs, tx) session.close() client.close() return solveds_tr, solveds_ge
def test_define_schema(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().write() as tx: tx.query("define person sub entity, has name; name sub attribute, datatype string;") tx.commit() session.close() client.close()
def reactomeMigrator(uri, keyspace, num_path, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) pathway_associations = filterHomoSapiens(num_path) insertPathways(uri, keyspace, num_threads, ctn, session, pathway_associations) insertPathwayInteractions(uri, keyspace, num_threads, ctn, session, pathway_associations) session.close() client.close()
def disgenetMigrator(uri, keyspace, num, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) batches_pr = [] if num is not 0: print(' ') print('Opening Disgenet dataset...') print(' ') with open( '../biograkn-covid/Dataset/Disgenet/all_gene_disease_associations.tsv', 'rt', encoding='utf-8') as csvfile: csvreader = csv.reader(csvfile, delimiter=' ') raw_file = [] n = 0 for row in csvreader: n = n + 1 if n is not 1: raw_file.append(row) disgenet = [] for i in raw_file[:num]: data = {} data['entrez-id'] = i[0].strip() data['gene-symbol'] = i[1] data['disease-id'] = i[4] data['disease-name'] = i[5] data['disgenet-score'] = float(i[9]) disgenet.append(data) insertDiseases(disgenet, session, num_threads, ctn) counter = 0 pool = ThreadPool(num_threads) batches = [] for q in disgenet: counter = counter + 1 graql = f""" match $g isa gene, has gene-symbol "{q['gene-symbol']}", has entrez-id "{q['entrez-id']}"; $d isa disease, has disease-id "{q['disease-id']}", has disease-name "{q['disease-name']}"; insert $r (associated-gene: $g, associated-disease: $d) isa gene-disease-association, has disgenet-score {q['disgenet-score']};""" batches.append(graql) del graql if counter % ctn == 0: batches_pr.append(batches) batches = [] batches_pr.append(batches) pool.map(partial(batch_job, session), batches_pr) pool.close() pool.join() print('.....') print('Finished migrating Disgenet.') print('.....') session.close() client.close()
def create_grakn_connection(): global client, session, transaction, connection_to_grakn_exists if not connection_to_grakn_exists: client = GraknClient(uri="localhost:48555") session = client.session(keyspace=keyspace_name) ## create a transaction to talk to the Grakn server transaction = session.transaction().read() connection_to_grakn_exists = True
def setUp(self): self._client = GraknClient(uri="localhost:48555") self._session = self._client.session(keyspace=keyspace_name) with open('schemas/phone-calls-schema.gql', 'r') as schema: define_query = schema.read() with self._session.transaction().write() as transaction: transaction.query(define_query) transaction.commit() print("Loaded the " + keyspace_name + " schema")
def __init__(self): # Initialize Grakn client self.grakn = GraknClient(uri='localhost:48555') self.session = self.grakn.session(keyspace='grakn') # Initialize ElasticSearch client self.elasticsearch = Elasticsearch([{ 'host': 'localhost', 'port': 9200 }], timeout=30) # Delete current indexes self.elasticsearch.indices.delete(index='stix-observables', ignore=[400, 404]) self.elasticsearch.indices.delete(index='external-references', ignore=[400, 404]) self.elasticsearch.indices.delete(index='stix-domain-entities', ignore=[400, 404]) self.elasticsearch.indices.delete(index='stix-relations', ignore=[400, 404]) # Create new indexes self.elasticsearch.indices.create( index='stix-observables', ignore=400, body={'settings': { 'index': { 'max_result_window': 100000 } }}) self.elasticsearch.indices.create( index='external-references', ignore=400, body={'settings': { 'index': { 'max_result_window': 100000 } }}) self.elasticsearch.indices.create( index='stix-domain-entities', ignore=400, body={'settings': { 'index': { 'max_result_window': 100000 } }}) self.elasticsearch.indices.create( index='stix-relations', ignore=400, body={'settings': { 'index': { 'max_result_window': 100000 } }})
def __init__(self): # Initialize Grakn client self.grakn = GraknClient(uri='localhost:48555') self.session = self.grakn.session(keyspace='grakn') # Initialize ElasticSearch client self.elasticsearch = Elasticsearch([{ 'host': 'localhost', 'port': 9200 }], timeout=30)
def __init__(self): # Initialize Grakn client self.grakn = GraknClient(uri='localhost:48555') self.session = self.grakn.session(keyspace='grakn') # Open the dump file self.dump_file = open('./dump.gql', 'w') # Entities self.entities = {} # Relations self.relations = {}
def test_insert_query(self): client = GraknClient("localhost:48555") session = client.session("define_schema") with session.transaction().write() as tx: tx.query( "define person sub entity, has name; name sub attribute, value string;" ) tx.commit() with session.transaction().write() as tx: tx.query("insert $x isa person, has name \"john\";") tx.commit() session.close() client.close()
def insertSchema(uri, keyspace): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) print('.....') print('Inserting schema...') print('.....') with open("Schema/biograkn-covid.gql", "r") as graql_file: schema = graql_file.read() with session.transaction().write() as write_transaction: write_transaction.query(schema) write_transaction.commit() print('.....') print('Success inserting schema!') print('.....')
def migrate_relationships(uri, keyspace, data: list, ctn, process_id=0): ''' Migrate relations to Grakn\n data - table in a form of list of lists \n process_id - process id while running on multiple cores, by process_id = 0 ''' with GraknClient(uri=uri) as client: with client.session(keyspace=keyspace) as session: counter = 0 transaction = session.transaction().write() for data_entity in data: predicate_name = data_entity[1] subject_name = data_entity[2] object_name = data_entity[3] relation = relationship_mapper( predicate_name ) #add handler for situation when there is no relation implemented in a mapper pmid = data_entity[0] sentence_text = data_entity[4].replace('"', "'") match_query = 'match $p isa publication, has paper-id "{}"; $g1 isa gene, has gene-symbol "{}"; $g2 isa gene, has gene-symbol "{}"; '.format( data_entity[0], data_entity[2], data_entity[3]) insert_query = 'insert $r ({}: $g1, {}: $g2) isa {}, has sentence-text "{}"; $m (mentioned-genes-relation: $r, mentioning: $p) isa mention, has source "SemMed";'.format( relation["active-role"], relation["passive-role"], relation["relation-name"], sentence_text) transaction.query(match_query + insert_query) print(match_query + insert_query) if counter % ctn == 0: transaction.commit() transaction = session.transaction().write() print("Process {} COMMITED".format(process_id)) print("Process {} ----- {} relations added".format( process_id, counter)) counter = counter + 1 transaction.commit()
def build_banking_graph(inputs): with GraknClient(uri="localhost:48555") as client: with client.session(keyspace="banking") as session: for input in inputs: print("Loading from [" + input["data_path"] + "] into Grakn ...") load_data_into_grakn(input, session)
def get(self): args = parser.parse_args(strict=True) ApiKey = args['ApiKey'] thingType = args['thingType'] thingName = args['thingName'] has = args['has'] get = args['get'] limit = args['limit'] with GraknClient(uri="localhost:48555") as client: with client.session(keyspace="dev_test2") as session: with session.transaction().read() as read_transaction: match_iterator = read_transaction.query('match $t isa api_auth, has api_key \"'+ApiKey+'\" , has active $a; get $a; limit 1;') answers = match_iterator.collect_concepts() for answer in answers: if answer.is_attribute(): if (answer.value() == "true"): # return authenticated pass else: abort(401) else: abort(500, '... ill get back to you on this ...') if len(answers) == 0: abort(401) return {"thingType":thingType,"thingName":thingName,"has":has,"get":get,"limit":limit}
def get(self, kspace, #### mandatory #### keyspace name #### thing, #### mandatory #### thing to search for => format thing type = thing name => e.g: 'attribute=name' => match $t isa name #### has = " ", #### not mandatory #### search parameters to either filter or fetch extra data from the database #### #### parameter name => parameter value => e.g: 'name="Jim"' #### #### parametre name => parameter variable => e.g: 'name=$n' #### #### multiple values are comma seperated and types of values can be mixed => e.g: 'name=$n,eyeColor="blue"' #### get = "$t", #### not mandatory #### specify variables to fetch data from => e.g: '$t,$n' #### limit = 100): #### not mandatory #### fetch quantity limit => used to improve response time of api #### #### parameters #### split = has.split(',') has = "" if split[0] != ' ': for hasquery in split: has = has + ',has '+hasquery.split('=')[0]+' '+hasquery.split('=')[1] thingName = thing.split('=')[1] thingType = thing.split('=')[0].lower() #### data fetch #### jsonobject = json.dumps({"matchedName": thingName, "matchedType": thingType})[:-1]+', "answers":[ ' with GraknClient(uri="localhost:48555") as client: with client.session(keyspace=kspace) as session: with session.transaction().read() as read_transaction: match_iterator = read_transaction.query('match $t isa '+thingName+' '+has+';get '+get+';limit '+str(limit)+';') answers = match_iterator.collect_concepts() jsonobject = jsonobject + builders.objectSwitch(answers) jsonobject = jsonobject[:-1]+'}]}' return json.loads(jsonobject) # json.laods
def main(keyspace='office_ally_patients', dataPath='sample-data/Patient Matching Data.csv'): print('Starting database client...') with GraknClient(uri='localhost:48555') as client: print('Starting database session...') with client.session(keyspace=keyspace) as session: print('Starting write transaction...') with session.transaction().write() as transaction: print('Importing data from "{dataPath}"...'.format( dataPath=dataPath)) with open(dataPath) as csvfile: for row in csv.DictReader(csvfile): print('Inserting:', dict(row)) insert_iterator = transaction.query( queryInsertPatientRecord(row)) transaction.commit() print('Closing database session...') ## Close session print('Closing database client...') ## Close client print('Successfully imported.')
def _execute_relation_query( self, query: Text, relation_name: Text ) -> List[Dict[Text, Any]]: """ Execute a query that queries for a relation. All attributes of the relation and all entities participating in the relation are part of the result. """ with GraknClient(uri=self.uri) as client: with client.session(keyspace=self.keyspace) as session: with session.transaction().read() as tx: print("Executing Graql Query: " + query) result_iter = tx.query(query) relations = [] for concept in result_iter: relation_entity = concept.map().get(relation_name) relation = self._thing_to_dict(relation_entity) for ( role_entity, entity_set, ) in relation_entity.role_players_map().items(): role_label = role_entity.label() thing = entity_set.pop() relation[role_label] = self._thing_to_dict(thing) relations.append(relation) return relations
def init(blast_output_path): with GraknClient(uri="localhost:48555") as client: with client.session(keyspace="blast") as session: print("Connected to the proteins knowledge graph.") print("- - - - - - - - - - - - - - - - - - - - -") target_sequences = query_target_sequences(session) for sequence in target_sequences: print("BLASTing for: ", sequence) print("- - - - - - - - - - - - - - - - - - - - -") print( "Waiting for BLAST search to complete. This can take a few minutes." ) # result_handle = NCBIWWW.qblast( # "blastp", # "nr", # sequence # ) # print("Reading BLAST results") # print("- - - - - - - - - - - - - - - - - - - - -") # with open('./blast-output.xml', 'w') as output_file: # output_file.write(result_handle.read()) blast_record = NCBIXML.read(open(blast_output_path)) print( "Inserting BLAST results into the proteins knowledge graph." ) print("- - - - - - - - - - - - - - - - - - - - -") insert_new_proteins_n_alignments(session, sequence, blast_record)
def migrate_journals(uri, keyspace, journal_names: list, ctn, process_id=0): ''' Migrate journals to Grakn \n journal_names - list of journal names (strings) \n process_id - process id while running on multiple cores, by process_id = 0 ''' with GraknClient(uri=uri) as client: with client.session(keyspace=keyspace) as session: counter = 0 transaction = session.transaction().write() for journal_name in journal_names: ##Check if journal already in Knowledge Base try: match_query = 'match $j isa journal, has journal-name "{}"; get;'.format( journal_name) next(transaction.query(match_query)) except StopIteration: insert_query = 'insert $j isa journal, has journal-name "{}";'.format( journal_name) transaction.query(insert_query) if counter % ctn == 0: transaction.commit() transaction = session.transaction().write() print("Process {} COMMITED".format(process_id)) print("Process {} ----- {} journals added".format( process_id, counter)) counter = counter + 1 transaction.commit()
def init(shouldHalt): root = tk.Tk() # Build the Tkinter application with GraknClient(uri="localhost:48555") as client: with client.session(keyspace="tube_network") as session: tube_gui = TubeGui(session, root) if shouldHalt: root.mainloop()
def define_schema(host='35.234.48.188', port=48555): schema = """ define edge sub relation, relates head, relates tail, has edge_id, has edge_type; node sub entity, plays head, plays tail, has name, has supply_info_json; edge_id sub attribute, value long; edge_type sub attribute, value string; name sub attribute, value string; supply_info_json sub attribute, value string; """ with GraknClient(uri=f"{host}:{port}") as client: with client.session(keyspace="conceptnet") as session: with session.transaction().write() as transaction: transaction.query(schema) transaction.commit()
def __init__(self): # Load configuration self.config = yaml.load(open(os.path.dirname(__file__) + '/config.yml')) # Initialize Grakn client self.grakn = GraknClient(uri=self.config['grakn']['hostname'] + ':' + str(self.config['grakn']['port'])) self.session = self.grakn.session(keyspace='grakn') # Open the dump file self.dump_file = open('./dump.gql', 'w') # Entities self.entities = {} # Relations self.relations = {}
def cord_ner_migrator(uri, keyspace, num_ner, num_threads, ctn): client = GraknClient(uri=uri) session = client.session(keyspace=keyspace) tx = session.transaction().write() print('.....') print('Opening CORD NER file.') print('.....') with open('../biograkn-covid/Dataset/CORD_NER/CORD-NER-full.json', "r") as f: data = json.loads("[" + f.read().replace("}\n{", "},\n{") + "]") data = data[:num_ner] insert_authors(data, num_threads, ctn, session) insert_journals(data, num_threads, ctn, session) insert_publications_journals(data, num_threads, ctn, session) insert_publications_with_authors(data, num_threads, 1, session) insert_entities_pub(data, num_threads, ctn, session)