def re_create_config(event, context=None): """ End-point: (Temporary) To re-create the config after changes are made programmatically """ # get config. configuration index has 1 document result = [] es = getESConn() utility.createOrUpdateGlobalConfig(es, config_db=config_db) time.sleep(0.3) # 0.3 sec wait to allow time for created index to be ready query = {"query": retrieve.MatchAll()} res = es.search(index=config_db, body=query) if (res['hits']['total']['value'] > 0): result = res['hits']['hits'][0]['_source'] response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response
def get_config(event, context=None): """ End-point: Retrieves configuration """ # get config. configuration index has 1 document result = [] es = getESConn() if not es.indices.exists(index=config_db): # create config db if it does not exist utility.createOrUpdateGlobalConfig(es, config_db=config_db) time.sleep(0.3) # 0.3 sec wait to allow time for created index to be ready query = {"query": retrieve.MatchAll()} res = es.search(index=config_db, body=query) if (res['hits']['total']['value'] > 0): result = res['hits']['hits'][0]['_source'] response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response
def all_projects(event, context=None): """ End-point: Retrieves all projects. Each project is separate CBR application. """ result = [] # retrieve if ES index does exist es = getESConn() if es.indices.exists(index=projects_db): query = {} query['query'] = retrieve.MatchAll() res = es.search(index=projects_db, body=query) for hit in res['hits']['hits']: entry = hit['_source'] entry['id__'] = hit['_id'] result.append(entry) response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response
def cbr_retrieve(event, context=None): """ End-point: Completes the Retrieve step of the CBR cycle. """ start = timer() # start timer result = {'recommended': {}, 'bestK': []} es = getESConn() # es connection # query["query"]["bool"]["should"].append(queryFnc) queryAdded = False params = json.loads(event['body']) # parameters in request body # print(params) queryFeatures = params['data'] proj = params.get('project', None) if proj is None: projId = params.get('projectId', None) # name of casebase proj = utility.getByUniqueField(es, projects_db, "_id", projId) proj_attributes = proj['attributes'] globalSim = params['globalSim'] k = params['topk'] query = {"query": {"bool": {"should": []}}} query["size"] = int(k) # top k results for entry in queryFeatures: if ('value' in entry) and entry['value'] is not None and "" != entry['value'] and int( entry.get('weight', 0)) > 0 and entry['similarity'] != "None": queryAdded = True field = entry['name'] similarityType = entry['similarity'] options = retrieve.get_attribute_by_name(proj['attributes'], field).get('options', None) # print(options) # fieldType = entry['type'] # use lowercase when field is specified as case-insensitive value = entry['value'].lower() if similarityType == 'EqualIgnoreCase' else entry['value'] weight = entry['weight'] # isProblem = entry['unknown'] # strategy = entry['strategy'] qfnc = retrieve.getQueryFunction(field, value, weight, similarityType, options) query["query"]["bool"]["should"].append(qfnc) if not queryAdded: # retrieval all (up to k) if not query was added query["query"]["bool"]["should"].append(retrieve.MatchAll()) # perform retrieval counter = 0 res = es.search(index=proj['casebase'], body=query) for hit in res['hits']['hits']: entry = hit['_source'] entry.pop('hash__', None) # remove hash field and value entry = retrieve.remove_vector_fields(proj_attributes, entry) # remove vectors from Semantic USE fields if counter == 0: result['recommended'] = copy.deepcopy(entry) # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation) entry['score__'] = hit['_score'] # removed during an update operation result['bestK'].append(entry) counter += 1 # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied if counter > 0: for entry in queryFeatures: if not entry['unknown'] and ('value' in entry) and entry['value'] is not None and "" != entry[ 'value']: # copy known values result['recommended'][entry['name']] = entry['value'] if entry.get('similarity') is not None and entry['unknown'] and entry[ 'strategy'] != "Best Match": # use reuse strategies for unknown fields if entry['strategy'] == "Maximum": result['recommended'][entry['name']] = max(d[entry['name']] for d in result['bestK']) if entry['strategy'] == "Minimum": result['recommended'][entry['name']] = min(d[entry['name']] for d in result['bestK']) if entry['strategy'] == "Mean": result['recommended'][entry['name']] = np.mean([x[entry['name']] for x in result['bestK']]) if entry['strategy'] == "Median": result['recommended'][entry['name']] = np.median([x[entry['name']] for x in result['bestK']]) if entry['strategy'] == "Mode": result['recommended'][entry['name']] = statistics.mode([x[entry['name']] for x in result['bestK']]) end = timer() # end timer result['retrieveTime'] = end - start result['esTime'] = res['took'] response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response
def cbr_retrieve(event, context=None): """ End-point: Completes the Retrieve step of the CBR cycle. """ start = timer() # start timer result = {'recommended': {}, 'bestK': []} # query['query']['bool']['should'].append(queryFnc) queryAdded = False params = json.loads(event['body']) # parameters in request body # print(params) queryFeatures = params['data'] proj = params['project'] globalSim = params['globalSim'] k = params['topk'] query = {'query': {'bool': {'should': []}}} query['size'] = int(k) # top k results for entry in queryFeatures: if ('value' in entry ) and entry['value'] is not None and "" != entry['value'] and int( entry['weight']) > 0 and entry['similarityType'] != "None": queryAdded = True field = entry['field'] # fieldType = entry['type'] value = entry['value'] weight = entry['weight'] # isProblem = entry['unknown'] # strategy = entry['strategy'] similarityType = entry['similarityType'] qfnc = retrieve.getQueryFunction(field, value, weight, similarityType) query['query']['bool']['should'].append(qfnc) if not queryAdded: # retrieval all (up to k) if not query was added query['query']['bool']['should'].append(retrieve.MatchAll()) # print(query) # perform retrieval counter = 0 es = getESConn() res = es.search(index=proj['casebase'], body=query) for hit in res['hits']['hits']: entry = hit['_source'] entry.pop('hash__', None) # remove hash field and value if counter == 0: result['recommended'] = copy.deepcopy(entry) # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation) entry['score__'] = hit['_score'] # removed during an update operation result['bestK'].append(entry) counter += 1 # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied if counter > 0: for entry in queryFeatures: if not entry['unknown'] and ( 'value' in entry) and entry['value'] is not None and "" != entry[ 'value']: # copy known values result['recommended'][entry['field']] = entry['value'] if entry['similarityType'] != "None" and entry['unknown'] and entry[ 'strategy'] != "Best Match": # use reuse strategies for unknown fields if entry['strategy'] == "Maximum": result['recommended'][entry['field']] = max( d[entry['field']] for d in result['bestK']) if entry['strategy'] == "Minimum": result['recommended'][entry['field']] = min( d[entry['field']] for d in result['bestK']) if entry['strategy'] == "Mean": result['recommended'][entry['field']] = np.mean( [x[entry['field']] for x in result['bestK']]) if entry['strategy'] == "Median": result['recommended'][entry['field']] = np.median( [x[entry['field']] for x in result['bestK']]) if entry['strategy'] == "Mode": result['recommended'][entry['field']] = statistics.mode( [x[entry['field']] for x in result['bestK']]) end = timer() # end timer result['retrieveTime'] = end - start result['esTime'] = res['took'] response = { "statusCode": 200, "headers": headers, "body": json.dumps(result) } return response