def test_df(sample): from g4l.context_tree import ContextTree from g4l.util.degrees_of_freedom import degrees_of_freedom as df tree = ContextTree.init_from_sample(sample) assert len(df('g4l', tree)) == 53 # nodes have different dfs assert df('perl', tree) == -1 assert df('ct06', tree) == -0.5
def load_cache(estimator, X): """ Loads previously estimated context trees from file. The result folder is unique for each set of SMC parameters (epsilon and penalty interval) and sample. Parameters ---------- estimator : g4l.estimators.SMC The resulting context tree X : g4l.data.Sample A sample """ cache_folder, cachefile = cache_file(estimator, X) try: with open(cachefile, 'rb') as f: dic = pickle.load(f) print("Loaded from cache") except FileNotFoundError: return False estimator.max_depth = dic['max_depth'] estimator.penalty_interval = dic['penalty_interval'] estimator.epsilon = dic['epsilon'] estimator.cache_dir = dic['cache_dir'] estimator.thresholds = dic['thresholds'] estimator.context_trees = [] for i in range(dic['context_trees']): t = ContextTree.load_from_file('%s/%s.tree' % (cache_folder, i)) estimator.context_trees.append(t) return True
def calculate_likelihoods(champion_trees_folder, resamples_file, resample_size, subsamples_separator=None, num_cores=0): listfiles = list_files(champion_trees_folder) champion_trees = [ContextTree.load_from_file(f) for f in listfiles] num_resamples = len(resamples(resamples_file)) num_trees = len(champion_trees) pr = list(product(range(num_trees), range(num_resamples))) params = [(champion_trees_folder, resamples_file, resample_size, i, j, subsamples_separator) for i, j in pr] if num_cores is None: num_cores = 0 if num_cores > 1: with Pool(num_cores) as p: result = list(tqdm.tqdm(p.imap(calc_likelihood_process, params), total=len(params))) else: result = list(tqdm.tqdm(map(calc_likelihood_process, params), total=len(params))) return np.reshape(result, (num_trees, num_resamples))
def build(self): from g4l.context_tree import ContextTree # import code; code.interact(local=dict(globals(), **locals())) if len([c for c, f in self.contexts if c == '']) == 0: empty_tr_freq_dic = dict([(ctx, sum(ps)) for ctx, ps in self.contexts if len(ctx) == 1]) empty_tr_freq_dic = defaultdict(int, empty_tr_freq_dic) empty_tr_freq = [empty_tr_freq_dic[x] for x in self.A] self.contexts.append(('', empty_tr_freq)) df = self._build_contexts_dataframe() probs = self._build_transition_probs() max_depth = df.node.str.len().max() return ContextTree(max_depth, df, probs)
def create_samples(args): tree = ContextTree.load_from_file(args.tree.name) x = tree.generate_sample(args.size) + '\n' fl = os.path.abspath(args.output_file.name) open(fl, 'w').write(x) logging.info("Sample '%s' generated" % fl)
def load_trees(self): champion_trees_folder = os.path.join(self.folder, 'champion_trees') self.champion_trees = [] for f in sorted(glob.glob(champion_trees_folder + "/*.tree")): ct = ContextTree.load_from_file(f) self.champion_trees.append(ct)
def optimal_tree(self): f = os.path.join(self.folder, 'optimal.tree') return ContextTree.load_from_file(f)
def get_tree(trees_folder, idx): return ContextTree.load_from_file(list_files(trees_folder)[idx])