#!/usr/bin/env python import json from algos.LogisticRegression import LogisticRegression from sklearn.linear_model import LogisticRegression as _LogisticRegression from util.param_util import convert_params, is_truthy from codec import codecs_manager from cexc import get_messages_logger messages = get_messages_logger() class MultinomialLogisticRegression(LogisticRegression): def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), bools=['fit_intercept', 'probabilities'], strs=['solver', 'multi_class', 'class_weight']) # Solver if 'solver' in out_params: if out_params['solver'] not in ['newton-cg', 'lbfgs']: raise RuntimeError('solver must be either: newton-cg or lbfgs') else: # default out_params['solver'] = 'newton-cg' # Multiclass
# Copyright (C) 2015-2017 Splunk Inc. All Rights Reserved. import errno import gc import os import pandas as pd import cexc import conf import models.base from BaseProcessor import BaseProcessor from util import search_util logger = cexc.get_logger(__name__) messages = cexc.get_messages_logger() class ApplyProcessor(BaseProcessor): """The apply processor receives and returns pandas DataFrames.""" def __init__(self, process_options, searchinfo): """Initialize options for the processor. Args: process_options (dict): process options searchinfo (dict): information required for search """ self.searchinfo = searchinfo self.algo_name, self.algo, self.process_options, self.namespace = self.setup_model(process_options, self.searchinfo) self.resource_limits = self.load_resource_limits(self.algo_name)
#!/usr/bin/env python from base import BaseAlgo, TransformerMixin from codec import codecs_manager from util.param_util import convert_params import pandas as pd import networkx as nx from cexc import get_messages_logger, get_logger debug = get_messages_logger() class GraphLabelPropagation(TransformerMixin, BaseAlgo): def __init__(self, options): debug.info('NetworkX Version {}'.format(nx.__version__)) self.handle_options(options) # call same logic as in fit def apply(self, df, options): return self.fit(df, options) # compute connected components def fit(self, df, options): # Make a copy of data, to not alter original dataframe X = df.copy() # create a directed graph graph = nx.Graph() src_dest_name = self.feature_variables dfg = X[src_dest_name] for index, row in dfg.iterrows(): graph.add_edge(row[src_dest_name[0]], row[src_dest_name[1]]) #, value=row['value'])