def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.data = convert_to_data(data) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.weights = kwargs.get("weights", np.ones(self.N)) # TODO: Use these weights! self.weights /= sum(self.weights) assert len( self.weights ) == self.N, "Provided 'weights' must match the number of representative data points {0}!".format( self.N) self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.keep_index = kwargs.get("keep_index", False) self.data = convert_to_data(data, keep_index=self.keep_index) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # warn users about large background data sets if len(self.data.weights) > 100: log.warning( "Using " + str(len(self.data.weights)) + " background data samples could cause " + "slower run times. Consider using shap.kmeans(data, K) to summarize the background " + "as K weighted samples.") # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.keep_index = kwargs.get("keep_index", False) self.keep_index_ordered = kwargs.get("keep_index_ordered", False) self.data = convert_to_data(data, keep_index=self.keep_index) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance( self.data, DenseData ), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # warn users about large background data sets if len(self.data.weights) > 100: log.warning( "Using " + str(len(self.data.weights)) + " background data samples could cause " + "slower run times. Consider using shap.kmeans(data, K) to summarize the background " + "as K weighted samples.") # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0 # find E_x[f(x)] if self.keep_index: model_null = self.model.f(self.data.convert_to_df()) else: model_null = self.model.f(self.data.data) if isinstance(model_null, (pd.DataFrame, pd.Series)): model_null = np.squeeze(model_null.values) self.fnull = np.sum((model_null.T * self.data.weights).T, 0) self.expected_value = self.fnull # see if we have a vector output self.vector_out = True if len(self.fnull.shape) == 0: self.vector_out = False self.fnull = np.array([self.fnull]) self.D = 1 else: self.D = self.fnull.shape[0]
def __init__(self, model, data, link=IdentityLink(), **kwargs): # convert incoming inputs to standardized iml objects self.link = convert_to_link(link) self.model = convert_to_model(model) self.data = convert_to_data(data) match_model_to_data(self.model, self.data) # enforce our current input type limitations assert isinstance(self.data, DenseData), "Shap explainer only supports the DenseData input currently." assert not self.data.transposed, "Shap explainer does not support transposed DenseData currently." # init our parameters self.N = self.data.data.shape[0] self.P = self.data.data.shape[1] self.linkfv = np.vectorize(self.link.f) self.nsamplesAdded = 0 self.nsamplesRun = 0