def find_var_opts_for_rules(self, goal_phrase, l_cat_names, l_rule_names, idb, var_obj_parent, calc_level): print('find_rules_matching_result rules for', goal_phrase) rphrase = self.__phrase_mgr.get_rphrase(goal_phrase) l_rperms = self.__phraseperms.get_perms(rphrase) num_poss_ret = len(self.__l_active_rules) * len(l_rperms) irule_arr = bitvecdb.intArray(num_poss_ret); rperms_ret_arr = bitvecdb.intArray(num_poss_ret) num_vars_ret_arr = bitvecdb.intArray(num_poss_ret) rperms_arr = utils.convert_intvec_to_arr(l_rperms) cat_arr, rid_arr, num_cats, num_rids = 0, 0, 0, 0 l_rcats, l_rids = [], [] for cat_name in l_cat_names: cid = self.__d_rcats.get(cat_name, -1) if cid >= 0: l_rcats.append(cid) num_cats = len(l_rcats) if num_cats > 0: cat_arr = utils.convert_intvec_to_arr(l_rcats) for rule_name in l_rule_names: rid = self.__d_rnames.get(rule_name, -1) if rid >= 0: l_rids.append(rid) num_rids = len(l_rids) # if num_rids > 0: rid_arr = utils.convert_intvec_to_arr(l_rids) # bitvecdb.print_db_recs(self.__hcdb_rules, self.__el_bitvec_mgr.get_hcbdb()) num_rules_found = bitvecdb.find_matching_rules_vo( self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(), self.__el_bitvec_mgr.get_hcbdb(), irule_arr, num_vars_ret_arr, rperms_ret_arr, len(l_rperms), rperms_arr, num_cats, cat_arr, num_rids, rid_arr, False, -1) print('num_rules_found', num_rules_found) l_var_opt_objs = [] for ifound in range(num_rules_found): l_var_opt_objs.append(self.find_var_opts( idb, irule_arr[ifound], num_vars_ret_arr[ifound], rperms_ret_arr[ifound], var_obj_parent, calc_level)) return l_var_opt_objs
def get_cluster(self, l_phrase_bits): plen = len(l_phrase_bits) / self.__bitvec_size # Each cent is an array of el bitvecs, each cent is also an array of hd, one for each el num_recs = len(self.__l_cent_hd) ret_arr = bitvecdb.intArray(num_recs) null_arr = bitvecdb.intArray(0) num_ret = bitvecdb.get_thresh_recs( self.__hcdb_cent, ret_arr, plen, null_arr, utils.convert_charvec_to_arr(l_phrase_bits), False, True) l_rcents = [ret_arr[i] for i in range(num_ret)] return l_rcents
def test_rule(self, mpdbs, stmt, l_results, idb, l_rule_cats): phrase = utils.full_split(stmt) result_words = '' if l_results != []: result_words = ' '.join(utils.full_split(utils.convert_phrase_to_word_list(l_results[0][1:]))).lower() print('Testing rules for', phrase) rphrase = self.__phrase_mgr.get_rphrase(phrase) l_rperms = self.__phraseperms.get_perms(rphrase) # The maximum theoretical returns is the num of rules * the number of source perms num_poss_ret = len(self.__l_active_rules) * len(l_rperms) irule_arr = bitvecdb.intArray(num_poss_ret); rperms_ret_arr = bitvecdb.intArray(num_poss_ret) rperms_arr = utils.convert_intvec_to_arr(l_rperms) l_rule_cids = [self.get_cid(rule_cat) for rule_cat in l_rule_cats] num_vars_ret_arr = bitvecdb.intArray(num_poss_ret) # num_rules_found = bitvecdb.find_matching_rules( self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(), # irule_arr, rperms_ret_arr, len(l_rperms), rperms_arr, # len(l_rule_cids), utils.convert_intvec_to_arr(l_rule_cids)) num_rules_matched = 0 num_rules_found = bitvecdb.find_matching_rules_vo( self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(), self.__el_bitvec_mgr.get_hcbdb(), irule_arr, num_vars_ret_arr, rperms_ret_arr, len(l_rperms), rperms_arr, len(l_rule_cids), utils.convert_intvec_to_arr(l_rule_cids), 0, utils.convert_intvec_to_arr([]), False, 0) print('num_rules_found', num_rules_found) # l_var_opt_objs = [] # for ifound in range(num_rules_found): # l_var_opt_objs.append(self.find_var_opts( idb, irule_arr[ifound], num_vars_ret_arr[ifound], # rperms_ret_arr[ifound], var_obj_parent, calc_level)) # return l_var_opt_objs for iret in range(num_rules_found): # self.run_one_rule(irule_arr[iret], rperms_ret_arr[iret]) irule = irule_arr[iret] bext, iactive = self.__l_active_rules[irule] rperm_ret = rperms_ret_arr[iret] num_vars_ret = num_vars_ret_arr[iret] if not bext: # assert False, 'lrules should be run just like ext rules' print('Will run learned rule', iactive, 'as standard rule', irule) # self.__lrule_mgr.test_rule(irule, rperm_ret, result_words, mpdbs, idb) # continue print('should run rule called', self.__l_names[irule]) b_has_result, num_matched, ll_result_eids = self.run_one_rule(irule, rperm_ret, result_words, mpdbs, idb, num_vars_ret) if num_matched < 1: print('rule', irule, 'did not match the state of the story db for idb', idb) continue print('test rule produced: ', ' '.join([self.__el_bitvec_mgr.get_el_by_eid(el) for el in ll_result_eids[0]])) print('test expected result:', result_words) num_rules_matched += 1 if num_rules_matched > 0: self.__test_stat_num_rules_found += 1 else: self.__test_stat_num_rules_not_found += 1 pass
def init_db_for_cluster(self, cluster_min): l_buckets = [0, bitvec_size / 5, 2 * bitvec_size / 5, bitvec_size] buckets_arr = bitvecdb.intArray(len(l_buckets)) for ib, bhd in enumerate(l_buckets): buckets_arr[ib] = bhd bitvecdb.set_hd_buckets(self.get_hcbdb(), len(l_buckets), buckets_arr) bitvecdb.set_cluster_min(self.get_hcbdb(), cluster_min)
def get_closest_recs(self, k, phrase_eids, iskip, shrink=0): ret_arr, hds_arr, obits_arr = bitvecdb.intArray(k), bitvecdb.intArray( k), bitvecdb.charArray(k * bitvec_size) qdata = [] for iel in phrase_eids: qdata += self.__el_nlb_mgr.get_bin_by_id(iel) num_ret = bitvecdb.get_closest_recs( self.__hcbdb, k, ret_arr, hds_arr, obits_arr, len(phrase_eids), utils.convert_charvec_to_arr(qdata), iskip, shrink) l_idexs_ret, l_hds_arr = [ret_arr[ir] for ir in range(num_ret) ], [hds_arr[ir] for ir in range(num_ret)] nd_obits = np.array( [ord(obits_arr[ib]) for ib in range(num_ret * bitvec_size)], dtype=np.int8) nd_obits = np.reshape(nd_obits, (num_ret, bitvec_size)) return l_idexs_ret, l_hds_arr, nd_obits
def cluster_one_thresh(self, plen, recc_thresh): num_left = self.__bdb_all.init_num_left_buf(plen) cent_ret = bitvecdb.charArray(self.__bitvec_size*plen) hd_avg_ret, hd_thresh = bitvecdb.floatArray(1), bitvecdb.intArray(plen) l_clusters = [] while num_left >= c_cluster_min: num_left_now = self.__bdb_all.get_cluster_seed(cent_ret, hd_avg_ret, hd_thresh, plen, recc_thresh) num_added = num_left - num_left_now print('py: cluster_one_thresh num_added is', num_added) num_left = num_left_now if num_added == 0: break l_cent = [ord(cent_ret[ib]) for ib in range(self.__bitvec_size*plen)] l_clusters.append(nt_cluster(l_cent=l_cent, hd=[hd_thresh[iel] for iel in range(plen)], score=hd_avg_ret[0], num_hit=num_added)) nd_hd_cluster, nd_num = np.zeros(len(l_clusters)), np.zeros(len(l_clusters)) nd_centroids = np.zeros((len(l_clusters), self.__bitvec_size*plen), dtype=np.uint8) l_hd_thresh = [] for icluster, cluster in enumerate(l_clusters): nd_hd_cluster[icluster] = cluster.score nd_num[icluster] = cluster.num_hit nd_centroids[icluster, :] = np.array(cluster.l_cent, dtype=np.uint8) l_hd_thresh.append(cluster.hd) # l_homog_score.append(hd_cluster) if (l_clusters == [] or np.sum(nd_num) == 0): final_score = 1000.0 else: score = np.sum(np.multiply(nd_hd_cluster, nd_num)) / np.sum(nd_num) final_score = (nd_hd_cluster.shape[0] * score) + (num_left * 0.2) # think about the magic number and put it into a constant return final_score, nd_centroids, l_hd_thresh
def get_el_hd_recs(self, pos, hd, el, num_cands, cands_arr): irec_arr = bitvecdb.intArray(num_cands) # el_bitvec = self.__el_bitvec_mgr.get_bin_by_id(eid).tolist() el_bitvec = self.__el_nlb_mgr.get_el_bin(el) num_ret = bitvecdb.get_el_hd_recs_by_list( self.__hcbdb, irec_arr, cands_arr, num_cands, pos, hd, utils.convert_charvec_to_arr(el_bitvec)) return num_ret, irec_arr
def get_close_recs(self, idb, plen, hd_thresh, l_qbits): num_ret, len_arr = self.get_plen_irecs(idb, plen) ret_arr = bitvecdb.intArray(num_ret) num_ret = bitvecdb.get_thresh_recs_by_list( self.__hcbdb, ret_arr, plen, utils.convert_intvec_to_arr(hd_thresh), len_arr, num_ret, utils.convert_charvec_to_arr(l_qbits)) return num_ret, ret_arr
def get_rperms_with_eid_at(self, idb, eid, pos, num_cands, cands_arr): # bufsize = len(self.__l_rperms) irec_arr = bitvecdb.intArray(num_cands) el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid) num_ret = bitvecdb.get_irecs_with_eid_by_list( self.__hcbdb, irec_arr, idb, pos, cands_arr, num_cands, utils.convert_charvec_to_arr(el_bitvec, bitvec_size)) return num_ret, irec_arr
def get_rec_rule_names(self, nd_cent, hd_thresh, plen, num_recs, l_rule_names): iperm_arr = bitvecdb.intArray(num_recs) cent_arr = utils.convert_charvec_to_arr(nd_cent.tolist()) hd_arr = utils.convert_intvec_to_arr(hd_thresh) num_ret = bitvecdb.get_cluster(self.__hcbdb, iperm_arr, num_recs, cent_arr, plen, hd_arr) l_ret = [ l_rule_names[self.__l_phrase_rphrases[self.__l_perm_iphrase[ iperm_arr[iperm]]]] for iperm in range(num_ret) ] return l_ret
def get_irecs_with_eid(self, idb, eid, rphrase_src, l_rphrase_excl): bufsize = len(self.__l_rperms) irec_arr = bitvecdb.intArray(bufsize) el_bitvec = self.__el_nlb_mgr.get_bin_by_id(eid) num_ret = bitvecdb.get_irecs_with_eid( self.__hcbdb, irec_arr, idb, -1, utils.convert_charvec_to_arr(el_bitvec, bitvec_size)) s_rphrases_close = set() for iret in range(num_ret): rphrase = self.__l_phrase_rphrases[self.__l_perm_iphrase[ irec_arr[iret]]] if rphrase in [rphrase_src] + l_rphrase_excl: continue # phrase = self.__phraseperms.get_phrase(rphrase) # print(phrase) s_rphrases_close.add(rphrase) return s_rphrases_close
def convert_intvec_to_arr(bin, size=-1): if size == -1: size = len(bin) bin_arr = bitvecdb.intArray(size) for ib in range(size): bin_arr[ib] = int(bin[ib]) return bin_arr
def get_plen_irecs(self, idb, plen): bufsize = len(self.__l_rperms) irec_arr = bitvecdb.intArray(bufsize) num_ret = bitvecdb.get_plen_irecs(self.__hcbdb, irec_arr, plen, idb) return num_ret, irec_arr
def find_var_opts(self, idb, irule, num_var_opts, rperm, var_obj_parent, calc_level): print('irule', irule, 'num vars ret', num_var_opts, 'for rperm', rperm) # num_var_opts = num_vars_ret_arr[ifound] iel_ret = bitvecdb.intArray(num_var_opts); ivar_ret = bitvecdb.intArray(num_var_opts) src_iphrase_ret = bitvecdb.intArray(num_var_opts); src_iel_ret = bitvecdb.intArray(num_var_opts) bitvecdb.matching_rule_get_opt( self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(), self.__el_bitvec_mgr.get_hcbdb(), irule, rperm, iel_ret, ivar_ret, src_iphrase_ret, src_iel_ret, num_var_opts, True, -1) bitvecdb.init_vo(self.__hvos, irule, idb, -1, rperm) for ivar in range(num_var_opts): print('iel', iel_ret[ivar], 'ivar', ivar_ret[ivar], 'src iphrase', src_iphrase_ret[ivar], 'src iel', src_iel_ret[ivar]) bitvecdb.add_ext_var(self.__hvos, ivar_ret[ivar], True, True, iel_ret[ivar], 0, ivar) bitvecdb.do_vo(self.__hvos) c_l_match_phrases = []; l_map_to_obj_only = [] num_c_match_phrases = bitvecdb.get_num_match_phrases(self.__hvos) num_rule_stages = bitvecdb.get_rule_num_phrases(self.__hvos) l_open_phrases = [] for imatch in range(num_c_match_phrases): istage = bitvecdb.get_match_phrase_istage(self.__hvos, imatch) b_matched = bool(bitvecdb.get_match_phrase_b_matched(self.__hvos, imatch)) num_phrase_els = bitvecdb.get_num_phrase_els(self.__hvos, imatch) match_phrase = []; b_all_obj = True; open_phrase = [] for iel in range(num_phrase_els): i_def_type = bitvecdb.get_phrase_el_def_type(self.__hvos, imatch, iel) def_type = def_type_table[i_def_type] phrase_rval = bitvecdb.get_phrase_el_val(self.__hvos, imatch, iel) phrase_val = '(not found)' if phrase_rval == -1 else self.__el_bitvec_mgr.get_el_by_eid(phrase_rval) phrase_hd = bitvecdb.get_phrase_el_hd(self.__hvos, imatch, iel) # match_phrase.append([def_type, phrase_val]) match_phrase.append(phrase_val) if def_type == rec_def_type.obj: open_phrase.append([rec_def_type.obj, phrase_val]) elif def_type == rec_def_type.like: open_phrase.append([rec_def_type.like, phrase_val, phrase_hd]) b_all_obj = False else: assert False, 'only rec_def_type obj and like should be possible in find_var_opts()' if b_all_obj: l_map_to_obj_only.append(len(c_l_match_phrases)) c_l_match_phrases.append(nt_match_phrases(istage=istage, b_matched=b_matched, phrase=match_phrase, b_result=self.__l_bresults[irule] and (istage==(num_rule_stages-1)))) else: l_map_to_obj_only.append(-1) l_open_phrases.append(open_phrase) c_l_match_iphrase_combos = [] num_c_combos = bitvecdb.get_num_combos(self.__hvos) c_combo_len = bitvecdb.get_combo_len(self.__hvos) for icombo in range(num_c_combos): one_combo = []; b_all_obj = True for ival in range(c_combo_len): i_combo_val = bitvecdb.get_combo_val(self.__hvos, icombo, ival) i_true_combo_val = l_map_to_obj_only[i_combo_val] if i_true_combo_val == -1: b_all_obj = False one_combo.append(i_true_combo_val) if b_all_obj: c_l_match_iphrase_combos.append(one_combo) return cl_var_match_opts( irule, c_l_match_phrases, c_l_match_iphrase_combos, var_obj_parent, calc_level + 1, self.__l_bresults[irule], l_open_phrases)
def run_one_rule(self, irule, src_rperm, result_words, mpdbs, idb, num_var_opts): iel_ret = bitvecdb.intArray(num_var_opts); ivar_ret = bitvecdb.intArray(num_var_opts) src_iphrase_ret = bitvecdb.intArray(num_var_opts); src_iel_ret = bitvecdb.intArray(num_var_opts) bitvecdb.matching_rule_get_opt(self.__hcdb_rules, self.__phraseperms.get_bdb_all_hcdb(), self.__el_bitvec_mgr.get_hcbdb(), irule, src_rperm, iel_ret, ivar_ret, src_iphrase_ret, src_iel_ret, num_var_opts, False, 0) bitvecdb.init_vo(self.__hvos, irule, idb, -1, src_rperm, False, 0) for ivar in range(num_var_opts): print('iel', iel_ret[ivar], 'ivar', ivar_ret[ivar], 'src iphrase', src_iphrase_ret[ivar], 'src iel', src_iel_ret[ivar]) bitvecdb.add_ext_var(self.__hvos, ivar_ret[ivar], True, True, iel_ret[ivar], 0, ivar) num_matched_ret = bitvecdb.intArray(1); b_has_result = bitvecdb.run_rule(self.__hvos, num_matched_ret) if not b_has_result: return False, num_matched_ret[0], [] num_match_phrases = bitvecdb.get_num_match_phrases(self.__hvos) num_rule_stages = bitvecdb.get_rule_num_phrases(self.__hvos) result_iphrase = num_rule_stages - 1 ll_result_eids = [] for imatch in range(num_match_phrases): istage = bitvecdb.get_match_phrase_istage(self.__hvos, imatch) if istage != result_iphrase: continue b_matched = bool(bitvecdb.get_match_phrase_b_matched(self.__hvos, imatch)) if not b_matched: continue num_phrase_els = bitvecdb.get_num_phrase_els(self.__hvos, imatch) match_phrase = []; b_all_obj = True; open_phrase = [] ll_result_eids.append([]) for iel in range(num_phrase_els): i_def_type = bitvecdb.get_phrase_el_def_type(self.__hvos, imatch, iel) def_type = def_type_table[i_def_type] assert def_type == rec_def_type.obj, 'Error! Run rule should produce matched phrases with b_match that has only rec_def_type.obj' eid = bitvecdb.get_phrase_el_val(self.__hvos, imatch, iel) phrase_val = '(not found)' if eid == -1 else self.__el_bitvec_mgr.get_el_by_eid(eid) # phrase_hd = bitvecdb.get_phrase_el_hd(self.__hvos, imatch, iel) # match_phrase.append([def_type, phrase_val]) match_phrase.append(phrase_val) ll_result_eids[-1].append(eid) return b_has_result, num_matched_ret[0], ll_result_eids ll_phrase_data, ll_vars, ll_el_hds, = self.__lll_phrase_data[irule], self.__lll_vars[irule], self.__lll_el_hds[irule] ll_rperms_src, ll_rperms = [[src_rperm]], [] print('run one rule:\n', mpdbs.get_bdb_story().print_db(self.__el_bitvec_mgr.get_hcbdb())) # assert False, 'this code should all run inside the c bitvec library' if self.__l_bresults[irule]: ll_close_phrase_data = ll_phrase_data[1:-1]; ll_close_el_hs = ll_el_hds[1:-1] else: ll_close_phrase_data = ll_phrase_data[1:]; ll_close_el_hs = ll_el_hds[1:] for i_phrase_close, (l_phrase, l_el_hds) in enumerate(zip(ll_close_phrase_data, ll_close_el_hs)): num_len_recs, irec_arr = mpdbs.get_bdb_story().get_plen_irecs(idb, len(l_phrase)) for rperm_combo in ll_rperms_src: ll_eids = [self.__phraseperms.get_perm_eids(rperm1) for rperm1 in rperm_combo] # l_phrase_eids = [self.__phraseperms.get_perm_eids(rperm1) for el in l_phrase] iclose_vars = filter(lambda l: l[2] == (i_phrase_close + 1), ll_vars) num_match, match_arr = num_len_recs, irec_arr for iel, el_hd in enumerate(l_el_hds): # There can only be one var matching a dest, so we simply take the first from the list created by the filter l_one_var = filter(lambda l: l[3] == iel, iclose_vars) if l_one_var == []: num_match, match_arr = \ mpdbs.get_bdb_story().get_el_hd_recs( iel, el_hd, # int((1 - el_cd)*self.__bitvec_size), l_phrase[iel], num_match, match_arr) else: one_var = l_one_var[0] src_eid = ll_eids[one_var[0]][one_var[1]] num_match, match_arr = \ mpdbs.get_bdb_story().get_rperms_with_eid_at(idb, src_eid, one_var[3], num_match, match_arr) if num_match == 0: break for imatch in range(num_match): ll_rperms.append(rperm_combo + [mpdbs.get_bdb_story().get_rperm_from_iperm(match_arr[imatch])]) if ll_rperms == []: return False, [], [] ll_rperms_src = list(ll_rperms) ll_rperms = [] # assert False, 'there should be no guarantee of a return' ll_result_eids = [] if self.__l_bresults[irule]: iresult_vars = filter(lambda l: l[2] == len(ll_phrase_data)-1, ll_vars) l_result_eids = [self.__el_bitvec_mgr.get_el_id(el) for el in ll_phrase_data[-1]] for l_rperms in ll_rperms_src: ll_eids = [self.__phraseperms.get_perm_eids(rperm1) for rperm1 in l_rperms] l_result_eids_copy = list(l_result_eids) for var in iresult_vars: l_result_eids_copy[var[3]] = ll_eids[var[0]][var[1]] ll_result_eids.append(l_result_eids_copy) return True, ll_rperms_src, ll_result_eids