def predict_splunk_search(self, search_string, X_fields, Y_field, output_field): ''' returns a string that contains the correct field ''' splunk_search = 'search %s | ' % search_string x = sm.to_one_hot(X_fields, onehot_mapping=self.feature_onehot_mapping, ordering=self.onehot_ordering) class_log_probs = sm.array(self.log_prob_suff_stats) # dot the two x_dot_logprobs = sm.dot(x, class_log_probs.T()) priors = sm.array(self.log_prob_priors) # add the priors final_probs = sm.add(x_dot_logprobs, priors) argmax_sa = sm.argmax(final_probs) argmax_sa.rename('argmax_prob') # now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field) splunk_search += splunk_concat(argmax_sa.string, case_mapping_string) return splunk_search
def predict_splunk_search(self, search_string, X_fields, Y_field, output_field): ''' uses splunkmath to predict every event in the splunk search. ''' splunk_search = 'search %s' % search_string x = sm.array(X_fields) feature_averages = sm.array(self.feature_averages) feature_variances = sm.array(self.feature_variances) priors = sm.array(self.log_prob_priors) expterms = sm.div(sm.pow(sm.sub(x,feature_averages), 2), feature_variances) expterms = sm.mul(expterms, -.5) pi_terms = sm.mul(sm.ln(sm.mul(sm.mul(feature_variances,2),np.pi)), -.5) feature_probs = sm.add(pi_terms, expterms) class_probs = sm.sum(feature_probs, axis=1) final_probs = sm.add(priors, class_probs) final_probs.rename('final_probabilities') argmax_string = sm.argmax(final_probs) strings_concatenated = splunk_concat(splunk_search, final_probs.string) strings_concatenated = splunk_concat(strings_concatenated, argmax_string) # now in the splunksearch, the field final_probabilities_maxval contains either 0,1,2... etc. so we add a final thing # this should be decomposed later final_string = 'eval %s = case(%s)' % (output_field, ','.join(['%s=%s, %s' % ('argmax_final_probabilities', i, i) for i in range(self.num_classes)])) return splunk_concat(strings_concatenated, final_string)
def predict_splunk_search(self, search_string, feature_fields, class_field, output_field): ''' makes a search string that populates events with a new prediction field ''' # 1: search the right events splunk_search = 'search %s | ' % search_string # 2: initialize feature fector and get the meandiff vector ''' TODO COMMENT: if can pass in self.sufficient_statistics as a vector rather than first going to splunk string, would be better ''' features = sm.array(feature_fields) suffstats = sm.array(self.sufficient_statistics) meandiff = sm.sub(features,suffstats) # 3: now we're making the exp term in the multivariate gaussian pdf. it's meandiff dot cov dot meandiff.T # first we get dot(meandiff, inv_cov_matrix) icm = sm.array(self.inv_cov_matrix) temp = sm.dot(meandiff,icm) # cxn * nxn -> cxn # now cxn * nxc => cxc final = sm.dot(temp, meandiff.T()) # finally we only want the elemnts on the diagonals final_expterms = sm.diag(final) # and we scale by -.5 multiplied_expterms = sm.mul(final_expterms,-.5) # multiplied_expterms.rename('expterm') # make the pi term and ln it pi_term = np.pi**(len(feature_fields)/float(2)) multterm = sm.ln(sm.array((1/(self.cov_det_root*pi_term)))) prob_vec = sm.array(self.log_prob_priors) # splunk vector broadcasting takes care of the rest new_prob_vec = sm.add(sm.sub(prob_vec,multterm),multiplied_expterms) new_prob_vec.rename('prob') # make argmax splunkarray argmax_sa = sm.argmax(new_prob_vec) argmax_sa.rename('argmax_prob') # now the field argmax_prob_0_0 is the index of new_prob_vec's maximum argument case_mapping_string = sm.case_mapping(self.class_mapping, 'argmax_prob_0_0', output_field) splunk_search += splunk_concat(argmax_sa.string, case_mapping_string) # eval string needs to change, but all math is done # splunk_search += 'eval %s=if(prob_0_0>prob_0_1,"%s","%s")' % (output_field, self.class_mapping[0], self.class_mapping[1]) ## NEED TO CHANGE THE STRINGS 0, 1!!! return splunk_search
def predict_splunk_search(self, search_string, X_fields, Y_field, output_field): ''' uses splunkmath to predict every event in the splunk search. ''' splunk_search = 'search %s' % search_string x = sm.array(X_fields) feature_averages = sm.array(self.feature_averages) feature_variances = sm.array(self.feature_variances) priors = sm.array(self.log_prob_priors) expterms = sm.div(sm.pow(sm.sub(x, feature_averages), 2), feature_variances) expterms = sm.mul(expterms, -.5) pi_terms = sm.mul(sm.ln(sm.mul(sm.mul(feature_variances, 2), np.pi)), -.5) feature_probs = sm.add(pi_terms, expterms) class_probs = sm.sum(feature_probs, axis=1) final_probs = sm.add(priors, class_probs) final_probs.rename('final_probabilities') argmax_string = sm.argmax(final_probs) strings_concatenated = splunk_concat(splunk_search, final_probs.string) strings_concatenated = splunk_concat(strings_concatenated, argmax_string) # now in the splunksearch, the field final_probabilities_maxval contains either 0,1,2... etc. so we add a final thing # this should be decomposed later final_string = 'eval %s = case(%s)' % (output_field, ','.join([ '%s=%s, %s' % ('argmax_final_probabilities', i, i) for i in range(self.num_classes) ])) return splunk_concat(strings_concatenated, final_string)