def test_cache_historical_state_using(data_setup): cache = OperationsCache() train, _ = data_setup chain = chain_first() # chain fitted, model goes to cache chain.fit(input_data=train) cache.save_chain(chain) new_node = SecondaryNode(operation_type='logit') old_node = chain.root_node.nodes_from[0] # change child node to new one chain.update_node(old_node=old_node, new_node=new_node) # cache is not actual assert not cache.get(chain.root_node) # fit modified chain chain.fit(input_data=train) cache.save_chain(chain) # cache is actual now assert cache.get(chain.root_node) # change node back chain.update_node(old_node=chain.root_node.nodes_from[0], new_node=old_node) # cache is actual without new fitting, # because the cached model was saved after first fit assert cache.get(chain.root_node)
def test_cache_actuality_after_model_change(data_setup): """The non-affected nodes has actual cache after changing the model""" cache = OperationsCache() chain = chain_first() train, _ = data_setup chain.fit(input_data=train) cache.save_chain(chain) new_node = SecondaryNode(operation_type='logit') chain.update_node(old_node=chain.root_node.nodes_from[0], new_node=new_node) root_parent_first = chain.root_node.nodes_from[0] nodes_with_non_actual_cache = [chain.root_node, root_parent_first] nodes_with_actual_cache = [ node for node in chain.nodes if node not in nodes_with_non_actual_cache ] # non-affected nodes are actual assert all( [cache.get(node) is not None for node in nodes_with_actual_cache]) # affected nodes and their childs has no any actual cache assert all( [cache.get(node) is None for node in nodes_with_non_actual_cache])
def fit_from_cache(self, cache: OperationsCache): for node in self.nodes: cached_state = cache.get(node) if cached_state: node.fitted_operation = cached_state.operation else: node.fitted_operation = None
def test_multi_chain_caching_with_cache(data_setup): train, _ = data_setup cache = OperationsCache() main_chain = chain_second() other_chain = chain_first() # fit other_chain that contains the parts identical to main_chain other_chain.fit(input_data=train) cache.save_chain(other_chain) nodes_with_non_actual_cache = [main_chain.root_node, main_chain.root_node.nodes_from[0]] + \ [_ for _ in main_chain.root_node.nodes_from[0].nodes_from] nodes_with_actual_cache = [ node for node in main_chain.nodes if node not in nodes_with_non_actual_cache ] # check that using of other_chain make identical of the main_chain fitted, # despite the main_chain.fit() was not called assert all([cache.get(node) for node in nodes_with_actual_cache]) # the non-identical parts are still not fitted assert not any([cache.get(node) for node in nodes_with_non_actual_cache]) # check the same case with another chains cache = OperationsCache() main_chain = chain_fourth() prev_chain_first = chain_third() prev_chain_second = chain_fifth() prev_chain_first.fit(input_data=train) cache.save_chain(prev_chain_first) prev_chain_second.fit(input_data=train) cache.save_chain(prev_chain_second) nodes_with_non_actual_cache = [ main_chain.root_node, main_chain.root_node.nodes_from[1] ] nodes_with_actual_cache = [ child for child in main_chain.root_node.nodes_from[0].nodes_from ] assert not any([cache.get(node) for node in nodes_with_non_actual_cache]) assert all([cache.get(node) for node in nodes_with_actual_cache])
def test_cache_actuality_after_subtree_change_to_identical(data_setup): """The non-affected nodes has actual cache after changing the subtree to other pre-fitted subtree""" cache = OperationsCache() train, _ = data_setup pipeline = pipeline_first() other_pipeline = pipeline_second() pipeline.fit(input_data=train) cache.save_pipeline(pipeline) other_pipeline.fit(input_data=train) cache.save_pipeline(Pipeline(other_pipeline.root_node.nodes_from[0])) pipeline.update_subtree(pipeline.root_node.nodes_from[0], other_pipeline.root_node.nodes_from[0]) nodes_with_actual_cache = [node for node in pipeline.nodes if node not in [pipeline.root_node]] # non-affected nodes of initial pipeline and fitted nodes of new subtree are actual assert all([cache.get(node) is not None for node in nodes_with_actual_cache]) # affected root node has no any actual cache assert cache.get(pipeline.root_node) is None
def test_cache_actuality_after_primary_node_changed_to_subtree(data_setup): """ The non-affected nodes has actual cache after changing the primary node to pre-fitted subtree""" cache = OperationsCache() train, _ = data_setup pipeline = pipeline_first() other_pipeline = pipeline_second() pipeline.fit(input_data=train) cache.save_pipeline(pipeline) other_pipeline.fit(input_data=train) pipeline.update_subtree(pipeline.root_node.nodes_from[0].nodes_from[0], other_pipeline.root_node.nodes_from[0]) cache.save_pipeline(Pipeline(other_pipeline.root_node.nodes_from[0])) root_parent_first = pipeline.root_node.nodes_from[0] nodes_with_non_actual_cache = [pipeline.root_node, root_parent_first] nodes_with_actual_cache = [node for node in pipeline.nodes if node not in nodes_with_non_actual_cache] # non-affected nodes of initial pipeline and fitted nodes of new subtree are actual assert all([cache.get(node) for node in nodes_with_actual_cache]) # affected root nodes and their childs has no any actual cache assert not any([cache.get(node) for node in nodes_with_non_actual_cache])