def prediction(self, valid_set=False,extend=False): training_set, test_set, valid_set = TripleSet(), TripleSet(), TripleSet() training_set.read_triples(self.cfg['path_training']) test_set.read_triples(self.cfg['path_test']) valid_set.read_triples(self.cfg['path_valid']) path_rules_used = self.cfg['path_rules'] #for path_rules_used in self.cfg['path_rules']: start_time = current_milli_time() tmp_path = path_rules_used.split('/') path_output_used = 'predictions/{}/{}'.format(self.datasets, tmp_path[2].replace('rule', 'predict')) self.log.info('rules learning: {}'.format(path_rules_used)) self.log.info('output learning: {}'.format(path_output_used)) rules = RuleReader(path_rules_used).read() if extend: rules_exd = RuleReader(self.cfg['path_rules_ext']).read() rules.extend(rules_exd) path_output_used = 'predictions/{}/ext_{}'.format(self.datasets, tmp_path[2].replace('rule', 'predict')) test_set, valid_set = valid_set, test_set elif valid_set: path_output_used = 'predictions/{}/predict_valid_1000.txt'.format(self.datasets) test_set, valid_set = valid_set, test_set rules_size = len(rules) print('*** read rules {} rom file {}'.format(rules_size, path_rules_used)) rule_engine = RuleEngine(path_output_used, self.cfg['unseen_nagative_examples']) rule_engine.apply_rules_arx(rules, training_set, test_set, valid_set, self.cfg['top_k_output']) print('* evaluated {} rules to propose candiates for {} *2 completion tasks'.format(rules_size, len(test_set.triples))) print('* finished in {} ms.'.format(current_milli_time() - start_time)) self.log.info('finished in {} s.'.format((current_milli_time() - start_time) // 1000))
def progress_update(): nonlocal best, x0, nfeval, ngeval, last_shown if show_progress: if current_milli_time() - last_shown > 500: ss = np.float128(best).astype(str) ss += ' '*(20-len(ss)) out = '\rNo. function evals %6d \tNo. grad evals %6d \tBest value %s'%(nfeval,ngeval,ss) sys.stdout.write(out) sys.stdout.flush() last_shown = current_milli_time()
def __init__(self, datasets='WN18'): self.cfg = Config.load_predict_config(datasets) self.datasets = datasets self.log = Logger.get_log_cate('prediction.txt', 'Predict') self.log.info('****************************start new section*************************************') self.log.info('initialize learning {}'.format(current_milli_time())) Rule.set_application_mode()
def __init__(self, dataset='WN18'): self.log = Logger.get_log_cate('learning.txt', 'Learning') self.cfg = Config.load_learning_config(dataset) self.log.info( '****************************start new section*************************************' ) self.log.info('initialize learning {}'.format(current_milli_time())) self.triple_set = TripleSet() self.triple_set.read_triples(self.cfg['path_training'])
def save_scores(self): self.scores.append([0 for i in range(self.supported_types)]) self.freqs.append([0 for i in range(self.supported_types)]) self.timestamps.append(current_milli_time()) last_scores = self.scores[-1] last_freqs = self.freqs[-1] for i in range(len(self.current_scores)): last_freqs[i] = self.current_freqs[i] last_scores[i] = self.current_scores[i] / max(last_freqs[i], 1)
def __init__(self, output_path, unseen_nagative_example): if os.path.exists(output_path): os.remove(output_path) with open(output_path, 'w') as fp: pass self.output_path = output_path self.unseen_nagative_example = unseen_nagative_example self.log = Logger.get_log_cate('rule_engine.txt', 'RuleEngine') self.log.info( '****************************start new section*************************************' ) self.log.info('initialize rule engine {}'.format(current_milli_time()))
def train(self): triple_set = self.triple_set index_start_time = current_milli_time() self.log.info('training with config {}'.format(self.cfg)) path_sampler = PathSampler(triple_set) path_counter, batch_counter = 0, 0 mine_cyclic_not_acyclic = False all_useful_rules = [set()] snapshot_index, rule_size_cyclic, rule_size_acyclic = 0, 0, 0 last_cyclic_coverage, last_acyclic_coverage = 0.0, 0.0 self.log.info('indexing dataset: {}'.format(self.cfg['path_training'])) self.log.info('time elapsed: {} ms'.format(current_milli_time() - index_start_time)) snapshots_at = self.cfg['snapshots_at'] dataset = self.cfg['dataset'] start_time = current_milli_time() while True: batch_previously_found_rules, batch_new_useful_rules, batch_rules = 0, 0, 0 rule_size = rule_size_cyclic if mine_cyclic_not_acyclic else rule_size_acyclic useful_rules = all_useful_rules[rule_size] elapsed_seconds = (current_milli_time() - start_time) // 1000 ## snapshots rule affter t seconds white learning if elapsed_seconds > snapshots_at[snapshot_index]: total_rule = 0 for _rules in all_useful_rules: total_rule += len(_rules) snapshot_file = 'learning_rules/{}/rule_{}.txt'.format( dataset, snapshots_at[snapshot_index]) snapshot_index += 1 self.log.info('snapshot_rules: {} in file {}'.format( total_rule, snapshot_file)) snapshot_rules = copy.deepcopy(all_useful_rules) thread_snapshot = threading.Thread( target=self.process_snapshot_rule, args=( snapshot_rules, snapshot_file, )) thread_snapshot.start() print('created snapshot {} after {} seconds'.format( snapshot_index, elapsed_seconds)) if snapshot_index == len(snapshots_at): print( '*************************done learning*********************************' ) thread_snapshot.join() return 0 # batch learnig batch_start_time = current_milli_time() while True: if current_milli_time( ) - batch_start_time > self.cfg['batch_time']: break path_counter += 1 path = path_sampler.sample_path(rule_size + 2, mine_cyclic_not_acyclic) if path != None and path.is_valid(): rule = Rule() rule.init_from_path(path) gen_rules = rule.get_generalizations( mine_cyclic_not_acyclic) for r in gen_rules: if r.is_trivial(): continue batch_rules += 1 if r not in useful_rules: r.compute_scores(triple_set) if r.confidence >= self.cfg[ 'threshold_confidence'] and r.correctly_predicted >= self.cfg[ 'threshold_correct_predictions']: batch_new_useful_rules += 1 useful_rules.add(r) else: batch_previously_found_rules += 1 batch_counter += 1 str_type = 'CYCLIC' if mine_cyclic_not_acyclic else 'ACYCLIC' print('=====> batch [{} {}] {} (sampled {} pathes) *****'.format( str_type, rule_size + 1, batch_counter, path_counter)) current_coverage = batch_previously_found_rules / ( batch_new_useful_rules + batch_previously_found_rules) print( '=====> fraction of previously seen rules within useful rules in this batch: {} num of new rule = {} num of previously rule = {} num of all batch rules = {}' .format(current_coverage, batch_new_useful_rules, batch_previously_found_rules, batch_rules)) print('=====> stored rules: {}'.format(len(useful_rules))) if mine_cyclic_not_acyclic: last_cyclic_coverage = current_coverage else: last_cyclic_coverage = current_coverage if current_coverage > self.cfg[ 'saturation'] and batch_previously_found_rules > 1: rule_size += 1 if mine_cyclic_not_acyclic: rule_size_cyclic = rule_size if not mine_cyclic_not_acyclic: rule_size_acyclic = rule_size print( '=========================================================' ) print('=====> increasing rule size of {} rule to {}'.format( str_type, rule_size + 1)) self.log.info( 'increasing rule size of {} rules to {} after {} s'. format(str_type, rule_size + 1, (current_milli_time() - start_time) // 1000)) all_useful_rules.append(set()) mine_cyclic_not_acyclic = not mine_cyclic_not_acyclic if mine_cyclic_not_acyclic and rule_size_cyclic + 1 > self.cfg[ 'max_length_cylic']: mine_cyclic_not_acyclic = False
def apply_rules_arx(self, rules, training_set, test_set, validation_set, k): print('* applying rules') relation_to_rules = self.create_ordered_rule_index(rules) print( '* set up index structure covering rules for {} different relations' .format(len(relation_to_rules))) filter_set = TripleSet() filter_set.add_triple_set(training_set) filter_set.add_triple_set(test_set) filter_set.add_triple_set(validation_set) print('* constructed filter set with {} triples'.format( len(filter_set.triples))) if len(filter_set.triples) == 0: print('WARNING: using empty filter set!') # prepare the data structures used a s cache for question that are reoccuring # start iterating over the test cases counter, current_time, start_time = 0, 0, current_milli_time() ScoreTree.set_lower_bound(k) ScoreTree.set_upper_bound(ScoreTree.lower_bound) ScoreTree.set_epsilon(0.0001) for triple in test_set.triples: if counter % 100 == 0: print('* (# {} ) trying to guess the tail/head of {}'.format( counter, triple)) current_time = current_milli_time() print('Elapsed (s) = {}'.format( (current_time - start_time) // 1000)) start_time = current_milli_time() relation = triple.relation head = triple.head tail = triple.tail tail_question, head_question = (relation, head), (relation, tail) k_tail_tree = ScoreTree() k_head_tree = ScoreTree() if relation in relation_to_rules: relevant_rules = relation_to_rules.get(relation) for rule in relevant_rules: if not k_tail_tree.fine(): tail_candidates = rule.compute_tail_results( head, training_set) f_tail_candidates = self.__get_filtered_entities( filter_set, test_set, triple, tail_candidates, True) k_tail_tree.add_values(rule.get_applied_confidence(), f_tail_candidates) else: break for rule in relevant_rules: if not k_head_tree.fine(): head_candidates = rule.compute_head_results( tail, training_set) f_head_candidates = self.__get_filtered_entities( filter_set, test_set, triple, head_candidates, False) k_head_tree.add_values(rule.get_applied_confidence(), f_head_candidates) else: break k_tail_candidates, k_head_candidates = {}, {} k_tail_tree.get_as_linked_map(k_tail_candidates) k_head_tree.get_as_linked_map(k_head_candidates) top_k_tail_candidates = self.__sort_by_value(k_tail_candidates, k) top_k_head_candidates = self.__sort_by_value(k_head_candidates, k) counter += 1 writer = threading.Thread( target=self.__process_write_top_k_candidates, args=( triple, test_set, top_k_tail_candidates, top_k_head_candidates, )) writer.start() writer.join() print('* done with rule application')
def filter_moments(stim,Y,A,beta,C,m, dt = 1.0, oversample = 10, maxrate = 500, maxvcorr = 2000, method = "moment_closure", int_method = "euler", measurement = "moment", reg_cov = 0, reg_rate = 0, return_surrogates = False, use_surrogates = None, initial_conditions = None, progress = False, safe = True): ''' Parameters ---------- stim : zero-lage effective input (filtered stimulus plus mean offset) Y : point-process count observations, same length as stim A : forward operator for delay-line evolution C : projection of current state onto delay-like beta : basis history weights m : log-rate bias parameter, log-rates are regularized toward this value Other Parameters ---------------- dt : time step oversample : int Integration steps per time step. Should be larger if using Gaussian moment closure, which is stiff. Can be small if using second-order approximations, which are less stiff. maxrate : maximum rate tolerated maxvcorr: Maximum variance correction ('convexity correction' in some literature) tolerated during the moment closure. method : Moment-closure method. Can be "LNA" for mean-field with linear noise approximation, "moment_closure" for Gaussian moment-closure on the history process, or "second_order", which discards higher moments of the rate which emerge when exponentiating. int_method: Integration method. Can be either "euler" for forward-Euler, or "exponential", which integrates the locally-linearized system forward using matrix exponentiation (slower). measurement: "moment", "laplace", or "variational" reg_cov: Diagonal covariance regularization reg_rate: Small regularization toward log mean-rate; This parameter reflects the precision of a Gaussian prior about the log mean-rate, applied at every measurement update. return_surrogates: bool If true, Gaussian approximations of measurement likelihoods are returned. use_surrogates: None or tuple Can be set as tuple of (means, variances) for Gaussian approximations of meausrement updates. initial_conditions: None or tuple Can be set to a tuple (M1,M2) of initial conditions for moment filtering. progress: boolean Whether to report progress Returns ------- allLR : single-time marginal mean of log-rate allLV : single-time marginal variance of log-rate allM1 : low-dimensional approximation of history process, mean allM2 : low-dimensional approximation of history process, covariance nll : negative log-likelihood ''' # check arguments stim = asvector(stim) Y = asvector(Y) A = assquare(A) if oversample<1: raise ValueError('oversample must be non-negative integer') if method=="moment_closure" and measurement=="variational": warnings.warn("There are unresolved numerical stability issues "\ "when using the log-Gaussian variational update with Gaussian "\ "moment closure. Suggest using the second-order moment closure "\ "instead") # Precompute constants maxlogr = np.log(maxrate) maxratemc = maxvcorr*maxrate dtfine = dt/oversample T = len(stim) K = beta.size I = np.eye(K) Cb = C.dot(beta.T) CC = C.dot(C.T) Adt = A*dtfine if not use_surrogates is None: MR,VR = use_surrogates # Get measurement update function measurement = get_measurement(measurement) # Buid moment integrator functions mean_update, cov_update = get_moment_integrator(int_method,Adt) # Get update function (computes expected rate from moments) update = get_update_function(method,Cb,Adt,maxvcorr) # Accumulate negative log-likelihood up to a constant nll = 0 llrescale = 1.0/len(stim) if initial_conditions is None: # Initial condition for moments M1 = np.zeros((K,1)) M2 = np.eye(K)*1e-6 else: M1,M2 = initial_conditions # Store moments allM1 = np.zeros((T,K)) allM2 = np.zeros((T,K,K)) allLR = np.zeros((T)) allLV = np.zeros((T)) allmr = np.zeros((T)) allvr = np.zeros((T)) if progress: last_shown = current_milli_time() for i,s in enumerate(stim): # Regularize if reg_cov>0: strength = reg_cov+max(0,-np.min(np.diag(M2))) M2 = 0.5*(M2+M2.T) + strength*np.eye(K) # Integrate moments forward for j in range(oversample): logv = beta.T.dot(M2).dot(beta) logx = min(beta.T.dot(M1)+s,maxlogr) R0 = sexp(logx) R0 = min(maxrate,R0) R0 *= dtfine Rm,J = update(logx,logv,R0,M1,M2) M2 = cov_update(M2,J) + CC*Rm M1 = mean_update(M1) + C *Rm if safe: M1 = np.clip(M1,-100,100) M2 = np.clip(M2,-100,100) # Measurement update pM1,pM2 = M1,M2 if use_surrogates is None: # Use specified approximation method to handle non-conjugate # log-Gaussian Poisson measurement update M1,M2,ll,mr,vr = measurement_update_projected_gaussian(\ M1,M2,Y[i],beta,s,dt,m,reg_rate,measurement) allmr[i] = mr allvr[i] = vr else: # Use specified Gaussian approximations (MR,VR) to the # measurement likelihoods. M1,M2,ll = measurement_update_projected_gaussian_surrogate(\ M1,M2,Y[i],beta,s,dt,m,reg_rate,measurement, return_surrogate=False, surrogate=(MR[i],VR[i])) if safe: M1 = np.clip(M1,-100,100) M2 = np.clip(M2,-100,100) # Store moments allM1[i] = M1[:,0].copy() allM2[i] = M2.copy() allLR[i] = min(beta.T.dot(M1)+s,maxlogr) allLV[i] = beta.T.dot(M2).dot(beta) nll -= ll*llrescale if safe: # Heuristic: detect numerical failure and exit early failed = np.any(M1)<-1e5 failed|= logx>100*maxlogr failed|= nll<-1e10 if failed: nll = np.inf break if progress and current_milli_time()-last_shown>500: sys.stdout.write('\r%02.02f%%'%(i*100/T)) sys.stdout.flush() last_shown = current_milli_time() if progress: sys.stdout.write('\r100.00%') sys.stdout.flush() sys.stdout.write('\n') if return_surrogates: return allLR,allLV,allM1,allM2,nll,allmr,allvr else: return allLR,allLV,allM1,allM2,nll
def minimize_retry(objective,initial,jac=None,hess=None, verbose=False, printerrors=True, failthrough=True, tol=1e-5, simplex_only=False, show_progress=True, **kwargs): ''' Call `scipy.optimize.minimize`, retrying a few times in case one solver doesn't work. This addresses unresolved bugs that can cause exceptions in some of the gradient-based solvers in Scipy. If we happen upon these bugs, we can continue optimization using slower but more robused methods. Ultimately, this routine falls-back to the gradient-free Nelder-Mead simplex algorithm, although it will try to use faster routines if the hessian and gradient are providede. ''' # Store and track result so we can keep best value, even if it crashes result = None x0 = np.array(initial).ravel() g0 = 1/np.zeros(x0.shape) nfeval = 0 ngeval = 0 if jac is True: v,g = objective(x0) else: v = objective(x0) best = v # Show progress of the optimization? if show_progress: sys.stdout.write('\n') last_shown = current_milli_time() def progress_update(): nonlocal best, x0, nfeval, ngeval, last_shown if show_progress: if current_milli_time() - last_shown > 500: ss = np.float128(best).astype(str) ss += ' '*(20-len(ss)) out = '\rNo. function evals %6d \tNo. grad evals %6d \tBest value %s'%(nfeval,ngeval,ss) sys.stdout.write(out) sys.stdout.flush() last_shown = current_milli_time() def clear_progress(): if show_progress: progress_update() sys.stdout.write('\n') sys.stdout.flush() # Wrap the provided gradient and objective functions, so that we can # capture the function values as they are being optimized. This way, # if optimization throws an exception, we can still remember the best # value it obtained, and resume optimization from there using a slower # but more reliable method. These wrapper functions also act as # callbacks and allow us to print the optimization progress on screen. if jac is True: def wrapped_objective(params): nonlocal best, x0, nfeval, ngeval v,g = objective(params) if np.isfinite(v) and v<best: best = v x0 = params nfeval += 1 ngeval += 1 progress_update() return v,g else: def wrapped_objective(params): nonlocal best, x0, nfeval v = objective(params) if np.isfinite(v) and v<best: best = v x0 = params nfeval += 1 progress_update() return v if hasattr(jac, '__call__'): # Jacobain is function original_jac = jac def wrapped_jacobian(params): nonlocal best, x0, nfeval, ngeval nonlocal best, x0 g = original_jac(params) ngeval += 1 progress_update() return g jac = wrapped_jacobian # There are still some unresolved bugs in some of the optimizers that # can lead to exceptions and crashes! This routine catches these errors # and failes gracefully. Note that system interrupts are not caught, # and other unexpected errors are caught but reported, in case they # reflect an exception arising from a user-provided gradient or # objective function. def try_to_optimize(method,validoptions,jac_=None): try: options = {k:v for (k,v) in kwargs.items() if k in validoptions.split()} result = scipy.optimize.minimize(wrapped_objective,x0.copy(), jac=jac_,hess=hess,method=method,tol=tol,**options) _ = wrapped_objective(result.x) clear_progress() if result.success: return True if verbose or printerrors: sys.stderr.write('%s reported "%s"\n'%(method,result.message)) sys.stderr.flush() except (KeyboardInterrupt, SystemExit): # Don't catch system interrupts raise except (TypeError,NameError): # Likely an internal bug in scipy; don't report it clear_progress() return False except Exception: # Unexpected error, might be user error, report it traceback.print_exc() clear_progress() if verbose or printerrors: sys.stderr.write('Error using minimize with %s:\n'%method) sys.stderr.flush() traceback.print_exc() sys.stderr.flush() return False return False # We try a few different optimization, in order # -- If Hessian is available, Newton-CG should be fast! try it # -- Otherwise, BFGS is a fast gradient-only optimizer # -- Fall back to Nelder-Mead simplex algorithm if all else fails try: with warnings.catch_warnings(): warnings.filterwarnings("ignore",message='Method Nelder-Mead does not use') warnings.filterwarnings("ignore",message='Method BFGS does not use') # If gradient is provided.... if not jac is None and not jac is False and not simplex_only: if try_to_optimize('Newton-CG','disp xtol maxiter eps',jac_=jac): return x0 if try_to_optimize('BFGS','disp gtol maxiter eps norm',jac_=jac): return x0 # Without gradient... if not simplex_only: if try_to_optimize('BFGS','disp gtol maxiter eps norm',\ jac_=True if jac is True else None): return x0 # Simplex is last resort, slower but robust if try_to_optimize('Nelder-Mead', 'disp maxiter maxfev initial_simplex xatol fatol', jac_=True if jac is True else None): return x0 except (KeyboardInterrupt, SystemExit): print('Best parameters are %s with value %s'%(v2str_long(x0),best)) raise except Exception: traceback.print_exc() if not failthrough: raise # If we've reached here, it means that all optimizers terminated with # an error, or reported a failure to converge. If `failthrough` is # set, we can still return the best value found so far. if failthrough: if verbose: sys.stderr.write('Minimization may not have converged\n') sys.stderr.flush() return x0 # fail through raise ArithmeticError('All minimization attempts failed')