Exemplo n.º 1
0
def re_create_config(event, context=None):
  """
  End-point: (Temporary) To re-create the config after changes are made programmatically
  """
  # get config. configuration index has 1 document
  result = []
  es = getESConn()
  utility.createOrUpdateGlobalConfig(es, config_db=config_db)
  time.sleep(0.3)  # 0.3 sec wait to allow time for created index to be ready
  query = {"query": retrieve.MatchAll()}
  res = es.search(index=config_db, body=query)
  if (res['hits']['total']['value'] > 0):
    result = res['hits']['hits'][0]['_source']
  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemplo n.º 2
0
def get_config(event, context=None):
  """
  End-point: Retrieves configuration
  """
  # get config. configuration index has 1 document
  result = []
  es = getESConn()
  if not es.indices.exists(index=config_db):  # create config db if it does not exist
    utility.createOrUpdateGlobalConfig(es, config_db=config_db)
    time.sleep(0.3)  # 0.3 sec wait to allow time for created index to be ready
  query = {"query": retrieve.MatchAll()}
  res = es.search(index=config_db, body=query)
  if (res['hits']['total']['value'] > 0):
    result = res['hits']['hits'][0]['_source']
  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemplo n.º 3
0
def all_projects(event, context=None):
  """
  End-point: Retrieves all projects. Each project is separate CBR application.
  """
  result = []
  # retrieve if ES index does exist
  es = getESConn()
  if es.indices.exists(index=projects_db):
    query = {}
    query['query'] = retrieve.MatchAll()

    res = es.search(index=projects_db, body=query)
    for hit in res['hits']['hits']:
      entry = hit['_source']
      entry['id__'] = hit['_id']
      result.append(entry)

  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemplo n.º 4
0
def cbr_retrieve(event, context=None):
  """
  End-point: Completes the Retrieve step of the CBR cycle.
  """
  start = timer()  # start timer
  result = {'recommended': {}, 'bestK': []}
  es = getESConn()  # es connection
  # query["query"]["bool"]["should"].append(queryFnc)
  queryAdded = False
  params = json.loads(event['body'])  # parameters in request body
  # print(params)
  queryFeatures = params['data']
  proj = params.get('project', None)
  if proj is None:
    projId = params.get('projectId', None)  # name of casebase
    proj = utility.getByUniqueField(es, projects_db, "_id", projId)

  proj_attributes = proj['attributes']
  globalSim = params['globalSim']
  k = params['topk']
  query = {"query": {"bool": {"should": []}}}
  query["size"] = int(k)  # top k results
  for entry in queryFeatures:
    if ('value' in entry) and entry['value'] is not None and "" != entry['value'] and int(
            entry.get('weight', 0)) > 0 and entry['similarity'] != "None":
      queryAdded = True
      field = entry['name']
      similarityType = entry['similarity']
      options = retrieve.get_attribute_by_name(proj['attributes'], field).get('options', None)
      # print(options)
      # fieldType = entry['type']
      # use lowercase when field is specified as case-insensitive
      value = entry['value'].lower() if similarityType == 'EqualIgnoreCase' else entry['value']
      weight = entry['weight']
      # isProblem = entry['unknown']
      # strategy = entry['strategy']

      qfnc = retrieve.getQueryFunction(field, value, weight, similarityType, options)
      query["query"]["bool"]["should"].append(qfnc)

  if not queryAdded:  # retrieval all (up to k) if not query was added
    query["query"]["bool"]["should"].append(retrieve.MatchAll())
  # perform retrieval
  counter = 0
  res = es.search(index=proj['casebase'], body=query)
  for hit in res['hits']['hits']:
    entry = hit['_source']
    entry.pop('hash__', None)  # remove hash field and value
    entry = retrieve.remove_vector_fields(proj_attributes, entry)  # remove vectors from Semantic USE fields
    if counter == 0:
      result['recommended'] = copy.deepcopy(entry)
    # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation)
    entry['score__'] = hit['_score']  # removed during an update operation
    result['bestK'].append(entry)
    counter += 1

  # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied
  if counter > 0:
    for entry in queryFeatures:
      if not entry['unknown'] and ('value' in entry) and entry['value'] is not None and "" != entry[
        'value']:  # copy known values
        result['recommended'][entry['name']] = entry['value']
      if entry.get('similarity') is not None and entry['unknown'] and entry[
        'strategy'] != "Best Match":  # use reuse strategies for unknown fields
        if entry['strategy'] == "Maximum":
          result['recommended'][entry['name']] = max(d[entry['name']] for d in result['bestK'])
        if entry['strategy'] == "Minimum":
          result['recommended'][entry['name']] = min(d[entry['name']] for d in result['bestK'])
        if entry['strategy'] == "Mean":
          result['recommended'][entry['name']] = np.mean([x[entry['name']] for x in result['bestK']])
        if entry['strategy'] == "Median":
          result['recommended'][entry['name']] = np.median([x[entry['name']] for x in result['bestK']])
        if entry['strategy'] == "Mode":
          result['recommended'][entry['name']] = statistics.mode([x[entry['name']] for x in result['bestK']])

  end = timer()  # end timer
  result['retrieveTime'] = end - start
  result['esTime'] = res['took']
  response = {
    "statusCode": 200,
    "headers": headers,
    "body": json.dumps(result)
  }
  return response
Exemplo n.º 5
0
def cbr_retrieve(event, context=None):
    """
  End-point: Completes the Retrieve step of the CBR cycle.
  """
    start = timer()  # start timer
    result = {'recommended': {}, 'bestK': []}
    # query['query']['bool']['should'].append(queryFnc)
    queryAdded = False
    params = json.loads(event['body'])  # parameters in request body
    # print(params)
    queryFeatures = params['data']
    proj = params['project']
    globalSim = params['globalSim']
    k = params['topk']
    query = {'query': {'bool': {'should': []}}}
    query['size'] = int(k)  # top k results
    for entry in queryFeatures:
        if ('value' in entry
            ) and entry['value'] is not None and "" != entry['value'] and int(
                entry['weight']) > 0 and entry['similarityType'] != "None":
            queryAdded = True
            field = entry['field']
            # fieldType = entry['type']
            value = entry['value']
            weight = entry['weight']
            # isProblem = entry['unknown']
            # strategy = entry['strategy']
            similarityType = entry['similarityType']
            qfnc = retrieve.getQueryFunction(field, value, weight,
                                             similarityType)
            query['query']['bool']['should'].append(qfnc)

    if not queryAdded:  # retrieval all (up to k) if not query was added
        query['query']['bool']['should'].append(retrieve.MatchAll())
    # print(query)
    # perform retrieval
    counter = 0
    es = getESConn()
    res = es.search(index=proj['casebase'], body=query)
    for hit in res['hits']['hits']:
        entry = hit['_source']
        entry.pop('hash__', None)  # remove hash field and value
        if counter == 0:
            result['recommended'] = copy.deepcopy(entry)
        # entry['id__'] = hit['_id'] # using 'id__' to track this case (this is removed during an update operation)
        entry['score__'] = hit['_score']  # removed during an update operation
        result['bestK'].append(entry)
        counter += 1

    # Recommend: Get the recommended result using chosen reuse strategies for unknown attribute values and keep known attribute values supplied
    if counter > 0:
        for entry in queryFeatures:
            if not entry['unknown'] and (
                    'value'
                    in entry) and entry['value'] is not None and "" != entry[
                        'value']:  # copy known values
                result['recommended'][entry['field']] = entry['value']
            if entry['similarityType'] != "None" and entry['unknown'] and entry[
                    'strategy'] != "Best Match":  # use reuse strategies for unknown fields
                if entry['strategy'] == "Maximum":
                    result['recommended'][entry['field']] = max(
                        d[entry['field']] for d in result['bestK'])
                if entry['strategy'] == "Minimum":
                    result['recommended'][entry['field']] = min(
                        d[entry['field']] for d in result['bestK'])
                if entry['strategy'] == "Mean":
                    result['recommended'][entry['field']] = np.mean(
                        [x[entry['field']] for x in result['bestK']])
                if entry['strategy'] == "Median":
                    result['recommended'][entry['field']] = np.median(
                        [x[entry['field']] for x in result['bestK']])
                if entry['strategy'] == "Mode":
                    result['recommended'][entry['field']] = statistics.mode(
                        [x[entry['field']] for x in result['bestK']])

    end = timer()  # end timer
    result['retrieveTime'] = end - start
    result['esTime'] = res['took']
    response = {
        "statusCode": 200,
        "headers": headers,
        "body": json.dumps(result)
    }
    return response