def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): y_predict = None X = [] y = [] data_pts = [] for c in children: opVarValue = c.instantiation_value(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)): x_val = utils.get_number(c.get(tt.TIME), None) y_val = utils.get_number(opVarValue, None) X.append([x_val]) y.append(y_val) data_pts.append([x_val, y_val]) X = np.array(X) y = np.array(y) reg = LinearRegression().fit(X, y) x_predict = utils.get_number(alist.get(tt.TIME), None) y_predict = reg.predict(np.array([[x_predict]]))[0] prediction = [x_predict, y_predict] coeffs = [v for v in reg.coef_] coeffs.insert(0, reg.intercept_) fnStr = 'LIN;' + ';'.join([str(v) for v in reg.coef_]) fnAndData = \ """{{"function":{coeffs}, "data":{data_pts}, "prediction":{prediction}}}""".format( coeffs=coeffs, data_pts=data_pts, prediction=prediction) alist.instantiate_variable(alist.get(tt.OPVAR), y_predict) alist.set(tt.FNPLOT, fnAndData) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, len(data_pts) == len(children), alist.get(tt.OP), len(children))) return alist
def decompose(self, alist: A, G: InferenceGraph): # check for comparison operations: eq, lt, gt, lte, gte and for multiple variables in operation variable if alist.get(tt.OP).lower() in ['eq', 'lt', 'gt', 'lte', 'gte'] \ and len(alist.get(tt.OPVAR).split(' ')) > 1: opvars = alist.get(tt.OPVAR).split(' ') op_alist = alist.copy() # higher cost makes this decomposition more expensive op_alist.cost = alist.cost + 1 op_alist.branch_type = br.OR op_alist.parent_decomposition = 'comparison' op_alist.node_type = nt.HNODE # op_alist.state = states.EXPLORED # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) for p in opvars: pval = alist.get(p) child = Alist() child.set(tt.OP, "value") child.set(tt.OPVAR, p) child.set(p, pval) child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) # op_alist.link_child(child) G.link(op_alist, child, op_alist.parent_decomposition) else: return None return op_alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): delimiter = ';;' total = 0.0 numList = [] nonNumList = [] inst_vars = alist.instantiated_attributes().keys() for c in children: for k, v in c.instantiated_attributes().items(): if k not in inst_vars and k in alist.attributes and k != tt.OP: c.instantiate_variable(k, v) opVarValue = c.get(c.get(tt.OPVAR)) if isinstance(opVarValue, str): opVarValue = list(map(str, opVarValue.split(delimiter))) else: opVarValue = [opVarValue] for opval in opVarValue: if utils.is_numeric(opval): total += float(opval) numList.append(float(opval)) if not str(opval).startswith(vx.NESTING): nonNumList.append(str(opval)) else: # if not c.get(c.get(tt.OPVAR)).startswith(vx.NESTING): # nonNumList.append(c.get(c.get(tt.OPVAR))) nonNumList.append(opval) if numList or nonNumList: if len(numList) >= len(nonNumList): opVar = alist.get(tt.OPVAR) valueToReturn = total / len(children) if opVar == alist.get(tt.TIME): valueToReturn = str(int(valueToReturn)) alist.instantiate_variable(opVar, valueToReturn) else: # # get modal value # valueToReturn = max(nonNumList, key=nonNumList.count) counts = dict(Counter(nonNumList)) counts_set = set(counts.values()) max_val = max(counts_set) items = [x for x, y in counts.items() if y == max_val] valueToReturn = f'{delimiter} '.join(map(str, set(items))) # if len(nonNumList) == 1: # valueToReturn = nonNumList[0] # else: # # return list of different values # valueToReturn = ', '.join(map(str,set(nonNumList))) alist.instantiate_variable(alist.get(tt.OPVAR), valueToReturn) else: return None alist.instantiate_variable( tt.COV, estimate_uncertainty(children, len(numList) == len(children), alist.get(tt.OP), len(children))) return alist
def find_propert_time(alist: Alist): alist_arr = [] results = find_recording(artist=alist.get(tt.SUBJECT), title=alist.get(tt.OBJECT), date=None) # parse date formats and sort in reverse FORMATS = ['%Y', '%Y-%m-%d'] for r in results: date = '' for fmt in FORMATS: try: date = datetime.strptime(r['date'], fmt) r['date'] = date.strftime('%Y') except: pass results_sorted = [k for k in sorted(results, key=lambda x: x['date'])] for item in results_sorted: data_alist = alist.copy() data_alist.set(tt.TIME, item['date']) data_alist.data_sources = list( set(data_alist.data_sources + ['musicbrainz'])) alist_arr.append(data_alist) break # greedy; take only the first answer returned return alist_arr
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): data = [str(x.instantiation_value(alist.get(tt.OPVAR))) for x in children] data_str = ','.join(data) alist.instantiate_variable(alist.get(tt.OPVAR), data_str) # TODO: port code for cov calculations alist.instantiate_variable( tt.COV, estimate_uncertainty(children, False, alist.get(tt.OP), len(children))) return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): variables = alist.variables() data = [x.instantiation_value(alist.get(tt.OPVAR)) for x in children if (x not in list(variables.keys()) and x not in list(variables.values()))] alist.instantiate_variable(alist.get(tt.OPVAR), len(data)) alist.instantiate_variable(tt.COV, estimate_uncertainty( children, False, alist.get(tt.OP), len(children) )) return alist
def part_of_geopolitical_subject(alist: Alist): results = [] geopolitical_type = alist.get(tt.PROPERTY).split(':') for r in find_geopolitical_subelements(alist.get(tt.OBJECT), geopolitical_type[-1]): fact_alist = alist.copy() fact_alist.data_sources = list( set(fact_alist.data_sources + ['wikidata'])) fact_alist.set(tt.SUBJECT, r) results.append(fact_alist) return results
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): data = [ utils.get_number(x.instantiation_value(alist.get(tt.OPVAR)), 0) for x in children ] alist.instantiate_variable(alist.get(tt.OPVAR), sum(data)) # TODO: port code for cov calculations alist.instantiate_variable( tt.COV, estimate_uncertainty(children, True, alist.get(tt.OP), len(children))) return alist
def find_property_subject(alist: Alist): alist_arr = [] results = find_recording(artist=None, title=alist.get(tt.OBJECT), date=alist.get(tt.TIME)) for item in results: data_alist = alist.copy() data_alist.set(tt.SUBJECT, item['artist']) data_alist.data_sources = list( set(data_alist.data_sources + ['musicbrainz'])) alist_arr.append(data_alist) return alist_arr
def flush(alist: Alist, items) -> Alist: """ Flush query context that whose corresponding alist attribute value is different """ for k in items: try: if k in alist.get(tt.CONTEXT)[2] and alist.get( tt.CONTEXT)[2][k] != alist.get(k): del alist.get(tt.CONTEXT)[2][k] except: pass return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): data = {x: utils.get_number(x.instantiation_value( alist.get(tt.OPVAR)), 999999999999999) for x in children} minAlist = min(data, key=data.get) minValue = data[minAlist] alist.instantiate_variable(alist.get(tt.OPVAR), minValue) propagate.projections(alist, (minAlist,)) alist.instantiate_variable(tt.COV, estimate_uncertainty( children, True, alist.get(tt.OP), len(children) )) return alist
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): # do comparisons vars_to_compare = alist.get(tt.OPVAR).split(' ') # propagate projection vars to parent5 propagate.projections(alist, tuple(children)) response_var = "?_lte_" if len(vars_to_compare) == 0: alist.set(response_var, "false") return alist result = True if len(vars_to_compare) > 1 and utils.is_numeric( alist.instantiation_value(vars_to_compare[0])): for x in vars_to_compare[1:]: if utils.is_numeric( alist.instantiation_value(x)) and utils.is_numeric( alist.instantiation_value(x)): result = (utils.get_number( alist.instantiation_value( vars_to_compare[0]), 0) <= utils.get_number( alist.instantiation_value(x), 0)) and result else: result = False break else: result = False alist.set(response_var, str(result).lower()) # alist.instantiate_variable(tt.COV, estimate_uncertainty( # children, True, alist.get(tt.OP), len(children) # )) return alist
def get_map_strategy(self, alist: Alist): """ Get decomposition rules to apply to an alist Args ---- alist : Alist Return ------ ops : A list of reduce functions for aggregating alists """ # TODO: learn to predict best strategy given path of root from # node and attributes in alist self.last_heartbeat = time.time() if alist.get(tt.OP).lower() in ['eq', 'lt', 'gt', 'lte', 'gte']: return [(frank.map.map_wrapper.get_mapper_fn("comparison"), "comparison")] # if compound frame (i.e nesting point in frame), then normalize elif alist.uninstantiated_nesting_variables(): return [(frank.map.map_wrapper.get_mapper_fn("normalize"), "normalize")] else: ops = [] for allowed_op in config.config["base_decompositions"]: try: ops.append( (frank.map.map_wrapper.get_mapper_fn(allowed_op), allowed_op)) except Exception as ex: print("Error in decomposition mapper: " + str(ex)) random.shuffle(ops) return ops
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): allNumeric = True y_predict = None X = [] y = [] data_pts = [] for c in children: opVarValue = c.instantiation_value(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue) and utils.is_numeric(c.get(tt.TIME)): x_val = utils.get_number(c.get(tt.TIME), None) y_val = utils.get_number(opVarValue, None) X.append([x_val]) y.append(y_val) data_pts.append([x_val, y_val]) else: allNumeric = False X = np.array(X) y = np.array(y) x_to_predict = utils.get_number(alist.get(tt.TIME), None) if not x_to_predict: return None else: x_to_predict = np.array([x_to_predict]) gp_prediction = do_gpregress(X, y, x_to_predict, (np.max(y) - np.min(y))**2, 1) if gp_prediction is None: return None y_predict = gp_prediction[0]['y'] try: prediction = [x_to_predict, y_predict] alist.instantiate_variable(alist.get(tt.OPVAR), y_predict) alist.instantiate_variable(tt.COV, gp_prediction[0]['stdev'] / y_predict) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, allNumeric, alist.get(tt.OP), len(children))) except Exception as ex: print(ex) return None return alist
def part_of_relation_subject(alist: Alist): results = [] for r in find_relation_subject(alist.get(tt.OBJECT), "location"): factAlist = alist.copy() factAlist.data_sources.add('wikidata') factAlist.set(tt.SUBJECT, r) results.append(factAlist) return results
def find_property_subject(alist: Alist): entity_id = find_entity(alist.instantiation_value(tt.OBJECT), alist.get(tt.PROPERTY)) if not entity_id: return [] # compose wikidata query query = "" if alist.get(tt.TIME): query = """ SELECT DISTINCT ?sLabel (YEAR(?date) as ?year) WHERE{{ ?s wdt:{property_id} wd:{entity_id}. OPTIONAL {{wd:{entity_id} pq:P585 ?date .}} SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en".}} } } """.format(entity_id=entity_id, property_id=alist.get(tt.PROPERTY)) else: query = """ SELECT DISTINCT ?s ?sLabel WHERE {{ OPTIONAL {{ ?s wdt:{property_id} wd:{entity_id} . }} OPTIONAL {{ wd:{entity_id} wdt:{property_id} ?s . }} # hack to find inverse triple SERVICE wikibase:label {{ bd:serviceParam wikibase:language "en".}} }} """.format(entity_id=entity_id, property_id=alist.get(tt.PROPERTY)) params = {'format': 'json', 'query': query} response = requests.get(url='https://query.wikidata.org/sparql', params=params) alist_arr = [] try: data = response.json() for d in data['results']['bindings']: data_alist = alist.copy() data_alist.set(tt.SUBJECT, d['sLabel']['value']) if 'year' in d: data_alist.set(tt.TIME, d['year']['value']) data_alist.data_sources = list( set(data_alist.data_sources + ['wikidata'])) alist_arr.append(data_alist) except Exception as e: print("wikidata query response error: " + str(e)) return alist_arr
def part_of_relation_object(alist: Alist): results = [] for r in _part_of_relation_object(alist.get(tt.SUBJECT), "location"): fact_alist = alist.copy() fact_alist.data_sources = list( set(fact_alist.data_sources + ['wikidata'])) fact_alist.set(tt.OBJECT, r) results.append(fact_alist) return results
def find_property_values(alist: Alist, search_element: str): if not alist.get(tt.PROPERTY): return {} if search_element == tt.SUBJECT: pass elif search_element == tt.OBJECT: return find_property_object(alist) elif search_element == tt.TIME: pass
def _get_context(alist: Alist, idx, key): if not alist.get(tt.CONTEXT): return None try: if key in alist.attributes[tt.CONTEXT][idx]: return alist.attributes[tt.CONTEXT][idx][key] else: return None except: return None
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): sum = 0.0 allNumeric = True for c in children: for k, v in c.instantiated_attributes().items(): if k in alist.attributes: alist.instantiate_variable(k, v) opVarValue = c.get(c.get(tt.OPVAR)) if utils.is_numeric(opVarValue): sum += float(opVarValue) else: allNumeric = False alist.instantiate_variable(alist.get(tt.OPVAR), sum / len(children)) alist.instantiate_variable( tt.COV, estimate_uncertainty(children, allNumeric, alist.get(tt.OP), len(children))) return alist
def find_property_values(alist: Alist, search_element: str): prop = alist.get(tt.PROPERTY) if not prop or prop not in [ 'sing', 'sang', 'sung', 'recorded', 'performed' ]: return None if search_element == tt.SUBJECT: return find_property_subject(alist) elif search_element == tt.OBJECT: return find_property_object(alist) elif search_element == tt.TIME: return find_propert_time(alist)
def inject_retrieval_context(alist: Alist, source) -> Alist: """ Inject context values into alist attributes to be used for Information Retrieval from KBs. """ context = alist.get(tt.CONTEXT) if not context: return alist context = alist.get(tt.CONTEXT) context_store = {} context_store = { **context[0], **context[1], **context[2] } if context else {} for a in alist.attributes.keys(): if a in context_store: if type(context_store[a]) is dict and source in context_store[a]: alist.set(a, context_store[a][source]) elif type(context_store[a]) is not dict: alist.set(a, context_store[a]) return alist
def decompose(self, alist: A, G: InferenceGraph): # check if subject is empty or is a variable if not alist.get(tt.SUBJECT) \ or alist.get(tt.SUBJECT).startswith(vx.PROJECTION) \ or alist.get(tt.SUBJECT).startswith(vx.AUXILLIARY): return None # get the sub locations of the subject # TODO: perform geospatial decomp on OBJECT attribute sub_items = sparqlEndpoint.find_sub_location( alist.get(tt.SUBJECT).strip()) if not sub_items: return None alist.data_sources.add('geonames') op_alist = alist.copy() op_alist.set(tt.OP, 'sum') # higher cost makes this decomposition more expensive op_alist.cost = alist.cost + 4 op_alist.branch_type = br.AND op_alist.parent_decomposition = 'geospatial' op_alist.node_type = nt.HNODE # alist.link_child(op_alist) G.link(alist, op_alist, op_alist.parent_decomposition) for s in sub_items: child = alist.copy() child.set(tt.SUBJECT, s) child.set(tt.OP, 'value') child.cost = op_alist.cost + 1 child.node_type = nt.ZNODE child.set(tt.CONTEXT, op_alist.get(tt.CONTEXT)) # op_alist.link_child(child) G.link(op_alist, child, op_alist.parent_decomposition) return op_alist
def test_instantiateVariables(self): alist = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '?x', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) alist.set('#d', '') alist.set('?x', '#d') alist.instantiate_variable('#d', 99) self.assertEqual(alist.get('?x'), 99, "OBJECT should be 99.")
def find_property_values(self, alist: Alist, search_element: str): if search_element == tt.OBJECT: subject = alist.instantiation_value(tt.SUBJECT) nodes = self._get_nodes(subject) results = [] for node in nodes: try: data_alist = alist.copy() data_alist.set(tt.OBJECT, node[alist.get(tt.PROPERTY)]) data_alist.data_sources = list( set(data_alist.data_sources + [self.name])) results.append(data_alist) except: pass return results
def why(self, G: InferenceGraph, alist: Alist, decomp_op, in_place=True): ''' Explain a decomposition of this alist. Assumes a failed instantiation of this alist following KB searches''' expl = "" time = "" children = G.child_alists(alist.id) if alist.get(tt.TIME): time = f" in {alist.get(tt.TIME)}" if decomp_op == 'temporal': expl = f"Could not find the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)}{time}. " decomp_items = [] # for c in alist.children[0].children: for c in children: decomp_items.append(c.get(tt.TIME)) if len(decomp_items) >= 2: expl += f"Attempted to infer the required value{time} by finding the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)} " + \ f"at other times between {min(decomp_items)} and {max(decomp_items)}." elif decomp_op == 'geospatial': expl = f"Could not find the {alist.get(tt.PROPERTY)} of {alist.instantiation_value(tt.SUBJECT)}{time}. " decomp_items = [] # for c in alist.children[0].children: for c in G.child_alists(children[0].id): decomp_items.append(c.instantiation_value(tt.SUBJECT)) entities = '' if len(decomp_items) > 8: entities = f"{', '.join(decomp_items[0:8])} etc" else: entities = f"{', '.join(decomp_items[0:len(decomp_items)-1])} and {decomp_items[-1]}" if decomp_items: expl += f"Finding the {alist.get(tt.PROPERTY)}{time} for the constituent parts of " + \ f" {alist.instantiation_value(tt.SUBJECT)}: {entities}." elif decomp_op == 'normalize': expl = f"Need to solve the sub-queries before determining the {alist.get(tt.PROPERTY)}{time}." elif decomp_op == 'comparison': expl = f"Need to solve the sub-queries to determine the items to compare." if in_place: alist.set("why", expl) G.add_alist(alist)
def test_add_context(self): a = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Africa', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.TIME: '2010', tt.OPVAR: '?x', tt.COST: 1 }) ctx1 = [{ ctx.nationality: 'United Kingdom' }, { ctx.place: 'United Kingdom', ctx.device: 'phone', ctx.datetime: '2020-04-30 12:00:00' }, {}] a.set(tt.CONTEXT, ctx1) ctx2 = a.get(tt.CONTEXT) alistJson = json.loads(json.dumps(a.attributes)) self.assertEqual(ctx1, ctx2, "Context values do not match")
def test_inject_context_query(self): # query context a = Alist( **{ tt.ID: '1', tt.SUBJECT: 'Ghana', tt.PROPERTY: 'P1082', tt.OBJECT: '', tt.OPVAR: '?x', tt.COST: 1 }) ctx1 = [{ ctx.nationality: 'United Kingdom' }, { ctx.place: 'United Kingdom', ctx.device: 'phone', ctx.datetime: '2010-04-30 12:00:00' }, {}] a.set(tt.CONTEXT, ctx1) alist = frank.context.inject_query_context(a) ctx2 = a.get(tt.CONTEXT) self.assertEqual(ctx2[0][ctx.accuracy], 'low')
def reduce(alist: Alist, children: List[Alist], G: InferenceGraph): sum = 0.0 nonNumList = [] for c in children: for k, v in c.instantiated_attributes().items(): if k in alist.attributes: alist.instantiate_variable(k, v) opVarValue = c.get(c.get(tt.OPVAR)) if not c.get(c.get(tt.OPVAR)).startswith(vx.NESTING): nonNumList.append(c.get(c.get(tt.OPVAR))) # get modal value valueToReturn = max(nonNumList, key=nonNumList.count) alist.instantiate_variable(alist.get(tt.OPVAR), valueToReturn) # todo: propagate projection variables of modal alist to parent alist.instantiate_variable(tt.COV, estimate_uncertainty( children, len(nonNumList) > 0, 'value', len(children) )) return alist
def find_property_object(alist: Alist): results = [] subj_instantiation = alist.instantiation_value(tt.SUBJECT) if isinstance(subj_instantiation, str): country_id = getCountryPropertyDb(subj_instantiation.replace("_", " "), "id") else: return results if not country_id: return results try: params = { 'date': str(alist.get(tt.TIME)).replace(".0", ""), 'format': 'json', 'per_page': 1000 } response = requests.get( url= f'http://api.worldbank.org/v2/countries/{country_id}/indicators/{alist.get(tt.PROPERTY)}', params=params) try: data = response.json() if len(data) > 1 and data[1]: for d in data[1]: if d['value']: data_alist = alist.copy() data_alist.set(tt.OBJECT, d['value']) data_alist.data_sources = list( set(data_alist.data_sources + ['worldbank'])) results.append(data_alist) except Exception as ex: print("worldbank query response error: " + str(ex)) except Exception as ex: print("worldbank query error: " + str(ex)) return results