def xtest_property_coalescer_perf_test(): from src.single_node_coalescer import coalesce import os import json import datetime # get a timestamp t1 = datetime.datetime.now() # get the path to the test file test_filename = os.path.join(os.path.abspath(os.path.dirname(__file__)),'InputJson_1.1','EdgeIDAsStrAndPerfTest.json') # open the file and load it with open(test_filename,'r') as tf: incoming = json.load(tf) incoming = incoming['message'] # call function that does property coalesce coalesced = coalesce(incoming, method='property') # get the amount of time it took diff = datetime.datetime.now() - t1 # it should be less than this assert(diff.seconds < 120)
def xtest_killer_graphbased(): fn = f'InputJson_1.2/killer.json' testfilename = os.path.join(os.path.abspath(os.path.dirname(__file__)),fn) with open(testfilename,'r') as tf: answerset = json.load(tf) answerset = answerset['message'] newset = snc.coalesce(answerset,method='graph') rs = newset['results'] print(len(rs))
def test_big_graphbased(): fn = f'{input_dir}/bigger_new.json' testfilename = os.path.join(os.path.abspath(os.path.dirname(__file__)),fn) with open(testfilename,'r') as tf: answerset = json.load(tf) answerset = answerset['message'] newset = snc.coalesce(answerset,method='graph') rs = newset['results'] print(len(rs))
def xtest_big_graphbased(): """This input is over the github file size limit, so removing the test. But it's good for profiling""" fn = f'{input_dir}/workflowb_strider_out.json' testfilename = os.path.join(os.path.abspath(os.path.dirname(__file__)),fn) with open(testfilename,'r') as tf: answerset = json.load(tf) answerset = answerset['message'] newset = snc.coalesce(answerset,method='graph') rs = newset['results'] print(len(rs))
def test_graph_coalesce(): """Make sure that results are well formed.""" dir_path = os.path.dirname(os.path.realpath(__file__)) testfilename = os.path.join(dir_path, jsondir, 'famcov_new.json') with open(testfilename, 'r') as tf: answerset = json.load(tf) answerset = answerset['message'] #Some of these edges are old, we need to know which ones... original_edge_ids = set( [eid for eid, _ in answerset['knowledge_graph']['edges'].items()]) #now generate new answers newset = snc.coalesce(answerset, method='graph', return_original=False) kgnodes = set( [nid for nid, n in newset['knowledge_graph']['nodes'].items()]) kgedges = newset['knowledge_graph']['edges'] for r in newset['results']: #Make sure each result has at least one extra node binding nbs = r['node_bindings'] extra_node = False for qg_id, nbk in nbs.items(): if qg_id.startswith('extra'): extra_node = True #Every node binding should be found somewhere in the kg nodes for nb in nbk: assert nb['id'] in kgnodes #And each of these nodes should have a name assert 'name' in newset['knowledge_graph']['nodes'][nb['id']] #We are no longer updating the qgraph. # assert extra_node #make sure each new result has an extra edge nbs = r['edge_bindings'] extra_edge = False for qg_id, nbk in nbs.items(): if qg_id.startswith('extra'): extra_edge = True #check that the edges have the provenance we need #Every node binding should be found somewhere in the kg nodes for nb in nbk: eedge = kgedges[nb['id']] if nb['id'] in original_edge_ids: continue keys = [ a['attribute_type_id'] for a in eedge['attributes'] ] try: values = set( flatten([a['value'] for a in eedge['attributes']])) except: print(eedge) assert False ac_prov = set( ['infores:aragorn', 'infores:automat-robokop']) assert len(values.intersection(ac_prov)) == 2 assert len(values) > len(ac_prov)
def test_graph_coalesce_strider(): """Make sure that results are well formed.""" dir_path = os.path.dirname(os.path.realpath(__file__)) testfilename = os.path.join(dir_path, jsondir, 'strider_relay_mouse.json') with open(testfilename, 'r') as tf: answerset = json.load(tf) answerset = answerset['message'] newset = snc.coalesce(answerset, method='graph', return_original=False) for r in newset['results']: nbs = r['node_bindings'] extra = False for nb in nbs: if nb.startswith('extra'): extra = True assert extra
def xtest_property_coalescer_why_no_coalesce(): from src.single_node_coalescer import coalesce import os import json import datetime # get the path to the test file test_filename = os.path.join(os.path.abspath(os.path.dirname(__file__)),'InputJson_1.1','test_property.json') # open the file and load it with open(test_filename,'r') as tf: incoming = json.load(tf) incoming = incoming['message'] # call function that does property coalesce coalesced = coalesce(incoming, method='all') print(len(coalesced['results'])) print('hi')
def test_missing_node_norm(): from src.single_node_coalescer import coalesce import datetime # get a timestamp t1 = datetime.datetime.now() # get the path to the test file test_filename = os.path.join(os.path.abspath(os.path.dirname(__file__)), jsondir, 'graph_named_thing_issue.json') # open the file and load it with open(test_filename, 'r') as tf: incoming = json.load(tf) incoming = incoming['message'] # call function that does property coalesce coalesced = coalesce(incoming, method='graph') # get the amount of time it took diff = datetime.datetime.now() - t1 # it should be less than this assert (diff.seconds < 60)
async def coalesce_handler(request: PDResponse, method: MethodName): """ Answer coalesce operations. You may choose all, property, graph. """ # convert the incoming message into a dict in_message = request.dict() # save the logs for the response (if any) if 'logs' not in in_message or in_message['logs'] is None: in_message['logs'] = [] # these timestamps are causing json serialization issues in call to the normalizer # so here we convert them to strings. for log in in_message['logs']: log['timestamp'] = str(log['timestamp']) # make sure there are results to coalesce # 0 results is perfectly legal, there's just nothing to do. if 'results' not in in_message[ 'message'] or in_message['message']['results'] is None or len( in_message['message']['results']) == 0: status_code = 200 logger.error(f"No results to coalesce") # in_message['logs'].append(create_log_entry(f'No results to coalesce', "WARNING")) return JSONResponse(content=in_message, status_code=status_code) elif 'knowledge_graph' not in in_message['message'] or in_message[ 'message']['knowledge_graph'] is None or len( in_message['message']['knowledge_graph']) == 0: # This is a 422 b/c we do have results, but there's no graph to use. status_code = 422 logger.error(f"No knowledge graph to coalesce") # in_message['logs'].append(create_log_entry(f'No knowledge graph to coalesce', "ERROR")) return JSONResponse(content=in_message, status_code=status_code) # init the status code status_code: int = 200 # get the message to work on coalesced = in_message['message'] try: # call the operation with the message in the request message coalesced = coalesce(coalesced, method=method) # turn it back into a full trapi message in_message['message'] = coalesced # import json # with open('ac_out_attributes.json', 'w') as tf: # tf.write(json.dumps(in_message, default=str)) # # Normalize the data # coalesced = normalize(in_message) # # # save the response in the incoming message # in_message['message'] = coalesced['message'] except Exception as e: # put the error in the response status_code = 500 logger.exception(f"Exception encountered {str(e)}") # in_message['logs'].append(create_log_entry(f'Exception {str(e)}', "ERROR")) # return the result to the caller return JSONResponse(content=in_message, status_code=status_code)