Пример #1
0
  def prediction(self, valid_set=False,extend=False):
    training_set, test_set, valid_set = TripleSet(), TripleSet(), TripleSet()
    training_set.read_triples(self.cfg['path_training'])
    test_set.read_triples(self.cfg['path_test'])
    valid_set.read_triples(self.cfg['path_valid'])

    path_rules_used = self.cfg['path_rules']
    #for path_rules_used in self.cfg['path_rules']:
    start_time = current_milli_time()
    tmp_path = path_rules_used.split('/')
    path_output_used = 'predictions/{}/{}'.format(self.datasets, tmp_path[2].replace('rule', 'predict'))
    self.log.info('rules learning: {}'.format(path_rules_used))
    self.log.info('output learning: {}'.format(path_output_used))
    rules = RuleReader(path_rules_used).read()
    if extend:
      rules_exd = RuleReader(self.cfg['path_rules_ext']).read()
      rules.extend(rules_exd)
      path_output_used = 'predictions/{}/ext_{}'.format(self.datasets, tmp_path[2].replace('rule', 'predict'))
      test_set, valid_set = valid_set, test_set
    elif valid_set:
      path_output_used = 'predictions/{}/predict_valid_1000.txt'.format(self.datasets)
      test_set, valid_set = valid_set, test_set

    rules_size = len(rules)
    print('*** read rules {} rom file {}'.format(rules_size, path_rules_used))
    rule_engine = RuleEngine(path_output_used, self.cfg['unseen_nagative_examples'])
    rule_engine.apply_rules_arx(rules, training_set, test_set, valid_set, self.cfg['top_k_output'])
    print('* evaluated {} rules to propose candiates for {} *2 completion tasks'.format(rules_size, len(test_set.triples)))
    print('* finished in {} ms.'.format(current_milli_time() - start_time))
    self.log.info('finished in {} s.'.format((current_milli_time() - start_time) // 1000))
 def progress_update():
     nonlocal best, x0, nfeval, ngeval, last_shown
     if show_progress: 
         if current_milli_time() - last_shown > 500:
             ss = np.float128(best).astype(str)
             ss += ' '*(20-len(ss))
             out = '\rNo. function evals %6d \tNo. grad evals %6d \tBest value %s'%(nfeval,ngeval,ss)
             sys.stdout.write(out)
             sys.stdout.flush()
             last_shown = current_milli_time()
Пример #3
0
 def __init__(self, datasets='WN18'):
   self.cfg = Config.load_predict_config(datasets)
   self.datasets = datasets
   self.log = Logger.get_log_cate('prediction.txt', 'Predict')
   self.log.info('****************************start new section*************************************')
   self.log.info('initialize learning {}'.format(current_milli_time()))
   Rule.set_application_mode()
Пример #4
0
 def __init__(self, dataset='WN18'):
     self.log = Logger.get_log_cate('learning.txt', 'Learning')
     self.cfg = Config.load_learning_config(dataset)
     self.log.info(
         '****************************start new section*************************************'
     )
     self.log.info('initialize learning {}'.format(current_milli_time()))
     self.triple_set = TripleSet()
     self.triple_set.read_triples(self.cfg['path_training'])
Пример #5
0
 def save_scores(self):
     self.scores.append([0 for i in range(self.supported_types)])
     self.freqs.append([0 for i in range(self.supported_types)])
     self.timestamps.append(current_milli_time())
     last_scores = self.scores[-1]
     last_freqs = self.freqs[-1]
     for i in range(len(self.current_scores)):
         last_freqs[i] = self.current_freqs[i]
         last_scores[i] = self.current_scores[i] / max(last_freqs[i], 1)
Пример #6
0
 def __init__(self, output_path, unseen_nagative_example):
     if os.path.exists(output_path):
         os.remove(output_path)
     with open(output_path, 'w') as fp:
         pass
     self.output_path = output_path
     self.unseen_nagative_example = unseen_nagative_example
     self.log = Logger.get_log_cate('rule_engine.txt', 'RuleEngine')
     self.log.info(
         '****************************start new section*************************************'
     )
     self.log.info('initialize rule engine {}'.format(current_milli_time()))
Пример #7
0
    def train(self):
        triple_set = self.triple_set
        index_start_time = current_milli_time()
        self.log.info('training with config {}'.format(self.cfg))
        path_sampler = PathSampler(triple_set)
        path_counter, batch_counter = 0, 0
        mine_cyclic_not_acyclic = False
        all_useful_rules = [set()]
        snapshot_index, rule_size_cyclic, rule_size_acyclic = 0, 0, 0
        last_cyclic_coverage, last_acyclic_coverage = 0.0, 0.0
        self.log.info('indexing dataset: {}'.format(self.cfg['path_training']))
        self.log.info('time elapsed: {} ms'.format(current_milli_time() -
                                                   index_start_time))
        snapshots_at = self.cfg['snapshots_at']
        dataset = self.cfg['dataset']
        start_time = current_milli_time()
        while True:
            batch_previously_found_rules, batch_new_useful_rules, batch_rules = 0, 0, 0
            rule_size = rule_size_cyclic if mine_cyclic_not_acyclic else rule_size_acyclic
            useful_rules = all_useful_rules[rule_size]
            elapsed_seconds = (current_milli_time() - start_time) // 1000
            ## snapshots rule affter t seconds white learning
            if elapsed_seconds > snapshots_at[snapshot_index]:
                total_rule = 0
                for _rules in all_useful_rules:
                    total_rule += len(_rules)
                snapshot_file = 'learning_rules/{}/rule_{}.txt'.format(
                    dataset, snapshots_at[snapshot_index])
                snapshot_index += 1
                self.log.info('snapshot_rules: {} in file {}'.format(
                    total_rule, snapshot_file))
                snapshot_rules = copy.deepcopy(all_useful_rules)
                thread_snapshot = threading.Thread(
                    target=self.process_snapshot_rule,
                    args=(
                        snapshot_rules,
                        snapshot_file,
                    ))
                thread_snapshot.start()
                print('created snapshot {} after {} seconds'.format(
                    snapshot_index, elapsed_seconds))
                if snapshot_index == len(snapshots_at):
                    print(
                        '*************************done learning*********************************'
                    )
                    thread_snapshot.join()
                    return 0
            # batch learnig
            batch_start_time = current_milli_time()
            while True:
                if current_milli_time(
                ) - batch_start_time > self.cfg['batch_time']:
                    break
                path_counter += 1
                path = path_sampler.sample_path(rule_size + 2,
                                                mine_cyclic_not_acyclic)
                if path != None and path.is_valid():
                    rule = Rule()
                    rule.init_from_path(path)
                    gen_rules = rule.get_generalizations(
                        mine_cyclic_not_acyclic)
                    for r in gen_rules:
                        if r.is_trivial():
                            continue
                        batch_rules += 1
                        if r not in useful_rules:
                            r.compute_scores(triple_set)
                        if r.confidence >= self.cfg[
                                'threshold_confidence'] and r.correctly_predicted >= self.cfg[
                                    'threshold_correct_predictions']:
                            batch_new_useful_rules += 1
                            useful_rules.add(r)
                        else:
                            batch_previously_found_rules += 1

            batch_counter += 1
            str_type = 'CYCLIC' if mine_cyclic_not_acyclic else 'ACYCLIC'
            print('=====> batch [{} {}] {} (sampled {} pathes) *****'.format(
                str_type, rule_size + 1, batch_counter, path_counter))
            current_coverage = batch_previously_found_rules / (
                batch_new_useful_rules + batch_previously_found_rules)
            print(
                '=====> fraction of previously seen rules within useful rules in this batch: {} num of new rule = {} num of previously rule = {} num of all batch rules = {}'
                .format(current_coverage, batch_new_useful_rules,
                        batch_previously_found_rules, batch_rules))
            print('=====> stored rules: {}'.format(len(useful_rules)))
            if mine_cyclic_not_acyclic:
                last_cyclic_coverage = current_coverage
            else:
                last_cyclic_coverage = current_coverage

            if current_coverage > self.cfg[
                    'saturation'] and batch_previously_found_rules > 1:
                rule_size += 1
                if mine_cyclic_not_acyclic:
                    rule_size_cyclic = rule_size
                if not mine_cyclic_not_acyclic:
                    rule_size_acyclic = rule_size
                print(
                    '========================================================='
                )
                print('=====> increasing rule size of {} rule to {}'.format(
                    str_type, rule_size + 1))
                self.log.info(
                    'increasing rule size of {} rules to {}  after {} s'.
                    format(str_type, rule_size + 1,
                           (current_milli_time() - start_time) // 1000))
                all_useful_rules.append(set())

            mine_cyclic_not_acyclic = not mine_cyclic_not_acyclic
            if mine_cyclic_not_acyclic and rule_size_cyclic + 1 > self.cfg[
                    'max_length_cylic']:
                mine_cyclic_not_acyclic = False
Пример #8
0
    def apply_rules_arx(self, rules, training_set, test_set, validation_set,
                        k):
        print('* applying rules')
        relation_to_rules = self.create_ordered_rule_index(rules)
        print(
            '* set up index structure covering rules for {} different relations'
            .format(len(relation_to_rules)))
        filter_set = TripleSet()
        filter_set.add_triple_set(training_set)
        filter_set.add_triple_set(test_set)
        filter_set.add_triple_set(validation_set)
        print('* constructed filter set with {} triples'.format(
            len(filter_set.triples)))
        if len(filter_set.triples) == 0:
            print('WARNING: using empty filter set!')
        # prepare the data structures used a s cache for question that are reoccuring
        # start iterating over the test cases
        counter, current_time, start_time = 0, 0, current_milli_time()

        ScoreTree.set_lower_bound(k)
        ScoreTree.set_upper_bound(ScoreTree.lower_bound)
        ScoreTree.set_epsilon(0.0001)

        for triple in test_set.triples:
            if counter % 100 == 0:
                print('* (# {} ) trying to guess the tail/head of {}'.format(
                    counter, triple))
                current_time = current_milli_time()
                print('Elapsed (s) = {}'.format(
                    (current_time - start_time) // 1000))
                start_time = current_milli_time()
            relation = triple.relation
            head = triple.head
            tail = triple.tail
            tail_question, head_question = (relation, head), (relation, tail)
            k_tail_tree = ScoreTree()
            k_head_tree = ScoreTree()

            if relation in relation_to_rules:
                relevant_rules = relation_to_rules.get(relation)
                for rule in relevant_rules:
                    if not k_tail_tree.fine():
                        tail_candidates = rule.compute_tail_results(
                            head, training_set)
                        f_tail_candidates = self.__get_filtered_entities(
                            filter_set, test_set, triple, tail_candidates,
                            True)
                        k_tail_tree.add_values(rule.get_applied_confidence(),
                                               f_tail_candidates)
                    else:
                        break
                for rule in relevant_rules:
                    if not k_head_tree.fine():
                        head_candidates = rule.compute_head_results(
                            tail, training_set)
                        f_head_candidates = self.__get_filtered_entities(
                            filter_set, test_set, triple, head_candidates,
                            False)
                        k_head_tree.add_values(rule.get_applied_confidence(),
                                               f_head_candidates)
                    else:
                        break

            k_tail_candidates, k_head_candidates = {}, {}
            k_tail_tree.get_as_linked_map(k_tail_candidates)
            k_head_tree.get_as_linked_map(k_head_candidates)
            top_k_tail_candidates = self.__sort_by_value(k_tail_candidates, k)
            top_k_head_candidates = self.__sort_by_value(k_head_candidates, k)
            counter += 1
            writer = threading.Thread(
                target=self.__process_write_top_k_candidates,
                args=(
                    triple,
                    test_set,
                    top_k_tail_candidates,
                    top_k_head_candidates,
                ))
            writer.start()
        writer.join()
        print('* done with rule application')
Пример #9
0
def filter_moments(stim,Y,A,beta,C,m,
    dt          = 1.0,
    oversample  = 10,
    maxrate     = 500,
    maxvcorr    = 2000,
    method      = "moment_closure",
    int_method  = "euler",
    measurement = "moment",
    reg_cov     = 0,
    reg_rate    = 0,
    return_surrogates  = False,
    use_surrogates     = None,
    initial_conditions = None,
    progress           = False,
    safe               = True):
    
    '''
    Parameters
    ----------
    stim : zero-lage effective input (filtered stimulus plus mean offset)
    Y : point-process count observations, same length as stim
    A : forward operator for delay-line evolution
    C : projection of current state onto delay-like
    beta : basis history weights
    m : log-rate bias parameter, log-rates are regularized toward this value
    
    Other Parameters
    ----------------
    dt : time step
    oversample : int
        Integration steps per time step. Should be larger if using 
        Gaussian moment closure, which is stiff. Can be small if using
        second-order approximations, which are less stiff.
    maxrate : 
        maximum rate tolerated
    maxvcorr: 
        Maximum variance correction ('convexity correction' in some literature)
        tolerated during the moment closure.
    method : 
        Moment-closure method. Can be "LNA" for mean-field with linear
        noise approximation, "moment_closure" for Gaussian moment-closure
        on the history process, or "second_order", which discards higher
        moments of the rate which emerge when exponentiating.
    int_method:
        Integration method. Can be either "euler" for forward-Euler, or 
        "exponential", which integrates the locally-linearized system 
        forward using matrix exponentiation (slower).
    measurement:
        "moment", "laplace", or "variational"
    reg_cov:
        Diagonal covariance regularization
    reg_rate:
        Small regularization toward log mean-rate; This parameter reflects
        the precision of a Gaussian prior about the log mean-rate, applied 
        at every measurement update.
    return_surrogates: bool
        If true, Gaussian approximations of measurement likelihoods are
        returned. 
    use_surrogates: None or tuple
        Can be set as tuple of (means, variances) for Gaussian
        approximations of meausrement updates.
    initial_conditions: None or tuple
        Can be set to a tuple (M1,M2) of initial conditions for moment
        filtering.
    progress: boolean
        Whether to report progress
    
    Returns
    -------
    allLR : single-time marginal mean of log-rate
    allLV : single-time marginal variance of log-rate
    allM1 : low-dimensional approximation of history process, mean
    allM2 : low-dimensional approximation of history process, covariance
    nll   : negative log-likelihood
    '''
    # check arguments
    stim = asvector(stim)
    Y    = asvector(Y)
    A    = assquare(A)
    if oversample<1:
        raise ValueError('oversample must be non-negative integer')
    if method=="moment_closure" and measurement=="variational":
        warnings.warn("There are unresolved numerical stability issues "\
        "when using the log-Gaussian variational update with Gaussian "\
        "moment closure. Suggest using the second-order moment closure "\
        "instead")
    # Precompute constants
    maxlogr   = np.log(maxrate)
    maxratemc = maxvcorr*maxrate
    dtfine    = dt/oversample
    T         = len(stim)
    K         = beta.size
    I         = np.eye(K)
    Cb        = C.dot(beta.T)
    CC        = C.dot(C.T)
    Adt       = A*dtfine
    if not use_surrogates is None:
        MR,VR = use_surrogates
    # Get measurement update function
    measurement = get_measurement(measurement)
    # Buid moment integrator functions
    mean_update, cov_update = get_moment_integrator(int_method,Adt)
    # Get update function (computes expected rate from moments)
    update = get_update_function(method,Cb,Adt,maxvcorr)
    # Accumulate negative log-likelihood up to a constant
    nll = 0
    llrescale = 1.0/len(stim)
    if initial_conditions is None:
        # Initial condition for moments
        M1 = np.zeros((K,1))
        M2 = np.eye(K)*1e-6
    else:
        M1,M2 = initial_conditions
    # Store moments
    allM1 = np.zeros((T,K))
    allM2 = np.zeros((T,K,K))
    allLR = np.zeros((T))
    allLV = np.zeros((T))
    allmr = np.zeros((T))
    allvr = np.zeros((T))
    if progress:
        last_shown = current_milli_time()
    for i,s in enumerate(stim):
        # Regularize
        if reg_cov>0:
            strength = reg_cov+max(0,-np.min(np.diag(M2)))
            M2 = 0.5*(M2+M2.T) + strength*np.eye(K) 
        # Integrate moments forward
        for j in range(oversample):
            logv  = beta.T.dot(M2).dot(beta)
            logx  = min(beta.T.dot(M1)+s,maxlogr)
            R0    = sexp(logx)
            R0    = min(maxrate,R0)
            R0   *= dtfine
            Rm,J  = update(logx,logv,R0,M1,M2)
            M2    = cov_update(M2,J) + CC*Rm
            M1    = mean_update(M1)  + C *Rm
            if safe:
                M1    = np.clip(M1,-100,100)
                M2    = np.clip(M2,-100,100)
        # Measurement update
        pM1,pM2 = M1,M2
        if use_surrogates is None:
            # Use specified approximation method to handle non-conjugate
            # log-Gaussian Poisson measurement update
            M1,M2,ll,mr,vr = measurement_update_projected_gaussian(\
                      M1,M2,Y[i],beta,s,dt,m,reg_rate,measurement)
            allmr[i] = mr
            allvr[i] = vr
        else:
            # Use specified Gaussian approximations (MR,VR) to the 
            # measurement likelihoods.
            M1,M2,ll = measurement_update_projected_gaussian_surrogate(\
                      M1,M2,Y[i],beta,s,dt,m,reg_rate,measurement,
                                      return_surrogate=False,
                                      surrogate=(MR[i],VR[i]))
        if safe:
            M1    = np.clip(M1,-100,100)
            M2    = np.clip(M2,-100,100)
        # Store moments
        allM1[i] = M1[:,0].copy()
        allM2[i] = M2.copy()
        allLR[i] = min(beta.T.dot(M1)+s,maxlogr)
        allLV[i] = beta.T.dot(M2).dot(beta)
        nll -= ll*llrescale
        if safe:
            # Heuristic: detect numerical failure and exit early
            failed = np.any(M1)<-1e5
            failed|= logx>100*maxlogr
            failed|= nll<-1e10
            if failed:
                nll = np.inf
                break
        if progress and current_milli_time()-last_shown>500:
            sys.stdout.write('\r%02.02f%%'%(i*100/T))
            sys.stdout.flush()
            last_shown = current_milli_time()
    if progress:
        sys.stdout.write('\r100.00%')
        sys.stdout.flush()
        sys.stdout.write('\n')
    if return_surrogates:
        return allLR,allLV,allM1,allM2,nll,allmr,allvr
    else:
        return allLR,allLV,allM1,allM2,nll
def minimize_retry(objective,initial,jac=None,hess=None,
                   verbose=False,
                   printerrors=True,
                   failthrough=True,
                   tol=1e-5,
                   simplex_only=False,
                   show_progress=True,
                   **kwargs):
    '''
    Call `scipy.optimize.minimize`, retrying a few times in case
    one solver doesn't work.
    
    This addresses unresolved bugs that can cause exceptions in some of
    the gradient-based solvers in Scipy. If we happen upon these bugs, 
    we can continue optimization using slower but more robused methods. 
    
    Ultimately, this routine falls-back to the gradient-free Nelder-Mead
    simplex algorithm, although it will try to use faster routines if
    the hessian and gradient are providede. 
    '''
    # Store and track result so we can keep best value, even if it crashes
    result = None
    x0     = np.array(initial).ravel()
    g0     = 1/np.zeros(x0.shape)
    nfeval = 0
    ngeval = 0
    if jac is True:
        v,g  = objective(x0)
    else:
        v    = objective(x0)
    best = v
    # Show progress of the optimization?
    if show_progress:
        sys.stdout.write('\n')
        last_shown = current_milli_time()
    def progress_update():
        nonlocal best, x0, nfeval, ngeval, last_shown
        if show_progress: 
            if current_milli_time() - last_shown > 500:
                ss = np.float128(best).astype(str)
                ss += ' '*(20-len(ss))
                out = '\rNo. function evals %6d \tNo. grad evals %6d \tBest value %s'%(nfeval,ngeval,ss)
                sys.stdout.write(out)
                sys.stdout.flush()
                last_shown = current_milli_time()
    def clear_progress():
        if show_progress: 
            progress_update()
            sys.stdout.write('\n')
            sys.stdout.flush()
    # Wrap the provided gradient and objective functions, so that we can
    # capture the function values as they are being optimized. This way, 
    # if optimization throws an exception, we can still remember the best
    # value it obtained, and resume optimization from there using a slower
    # but more reliable method. These wrapper functions also act as 
    # callbacks and allow us to print the optimization progress on screen.
    if jac is True:
        def wrapped_objective(params):
            nonlocal best, x0, nfeval, ngeval
            v,g = objective(params)
            if np.isfinite(v) and v<best:
                best = v
                x0   = params
            nfeval += 1
            ngeval += 1
            progress_update()
            return v,g
    else:
        def wrapped_objective(params):
            nonlocal best, x0, nfeval
            v = objective(params)
            if np.isfinite(v) and v<best:
                best = v
                x0   = params
            nfeval += 1
            progress_update()
            return v 
    if hasattr(jac, '__call__'):
        # Jacobain is function
        original_jac = jac
        def wrapped_jacobian(params):
            nonlocal best, x0, nfeval, ngeval
            nonlocal best, x0
            g = original_jac(params)
            ngeval += 1
            progress_update()
            return g
        jac = wrapped_jacobian
    # There are still some unresolved bugs in some of the optimizers that
    # can lead to exceptions and crashes! This routine catches these errors
    # and failes gracefully. Note that system interrupts are not caught, 
    # and other unexpected errors are caught but reported, in case they
    # reflect an exception arising from a user-provided gradient or 
    # objective function.
    def try_to_optimize(method,validoptions,jac_=None):
        try:
            options = {k:v for (k,v) in kwargs.items() if k in validoptions.split()}
            result = scipy.optimize.minimize(wrapped_objective,x0.copy(),
                jac=jac_,hess=hess,method=method,tol=tol,**options)
            _ = wrapped_objective(result.x)
            clear_progress()
            if result.success: 
                return True
            if verbose or printerrors:
                sys.stderr.write('%s reported "%s"\n'%(method,result.message))
                sys.stderr.flush()
        except (KeyboardInterrupt, SystemExit): 
            # Don't catch system interrupts
            raise
        except (TypeError,NameError):
            # Likely an internal bug in scipy; don't report it
            clear_progress()
            return False
        except Exception:
            # Unexpected error, might be user error, report it
            traceback.print_exc()
            clear_progress()
            if verbose or printerrors:
                sys.stderr.write('Error using minimize with %s:\n'%method)
                sys.stderr.flush()
                traceback.print_exc()
                sys.stderr.flush()
            return False
        return False
    # We try a few different optimization, in order
    # -- If Hessian is available, Newton-CG should be fast! try it
    # -- Otherwise, BFGS is a fast gradient-only optimizer
    # -- Fall back to Nelder-Mead simplex algorithm if all else fails
    try:
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",message='Method Nelder-Mead does not use')
            warnings.filterwarnings("ignore",message='Method BFGS does not use')
            # If gradient is provided....
            if not jac is None and not jac is False and not simplex_only:
                if try_to_optimize('Newton-CG','disp xtol maxiter eps',jac_=jac):
                    return x0
                if try_to_optimize('BFGS','disp gtol maxiter eps norm',jac_=jac):
                    return x0
            # Without gradient...
            if not simplex_only:
                if try_to_optimize('BFGS','disp gtol maxiter eps norm',\
                    jac_=True if jac is True else None):
                    return x0
            # Simplex is last resort, slower but robust
            if try_to_optimize('Nelder-Mead',
                    'disp maxiter maxfev initial_simplex xatol fatol',
                    jac_=True if jac is True else None):
                return x0
    except (KeyboardInterrupt, SystemExit):
        print('Best parameters are %s with value %s'%(v2str_long(x0),best))
        raise
    except Exception:
        traceback.print_exc()
        if not failthrough: raise
    # If we've reached here, it means that all optimizers terminated with
    # an error, or reported a failure to converge. If `failthrough` is 
    # set, we can still return the best value found so far. 
    if failthrough:
        if verbose:
            sys.stderr.write('Minimization may not have converged\n')
            sys.stderr.flush()
        return x0 # fail through
    raise ArithmeticError('All minimization attempts failed')