Python Logger.info примеры использования

Язык программирования: Python

Пространство имен/Пакет: common.System

Класс/Тип: Logger

Метод/Функция: info

Примеров на hotexamples.com: 2

Python Logger.info - 2 примера найдено. Это лучшие примеры Python кода для common.System.Logger.info, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

debug(5)

info(2)

Основные методы

debug (5)

info (2)

Пример #1

Показать файл

Файл: Data.py Проект: ericbroda/machine-learning

class Data:

    packageName = "com.brodagroup.machinelearning.common.Data"

    logger = None

    # Initializer
    def __init__(self):
        self.logger = Logger(self.packageName).getLogger()
        self.parameters = []
        return

    def configure():
        return 0

    def load(self, pathCSV):
        # dataframe = pd.read_csv(pathCSV, quotechar='"', skipinitialspace=True)
        dataframe = pd.read_csv(pathCSV)
        return dataframe

    def loadDataFrame(self, pathPKL):
        dataframe = pd.read_pickle(pathPKL)
        return dataframe

    def saveDataFrame(self, dataframe, pathPKL):
        dataframe.to_pickle(pathPKL)
        return

    def segment(self, features, target, totalPct, testingPct, randomState):

        # Use a small subset of testing hypothesis to lower run-time
        numRows, numFeatures = features.shape
        rowsUsed = int(numRows * totalPct)

        xfeatures = features[0:rowsUsed]
        xtarget = target[0:rowsUsed]

        # Note:  features represents "X" and target represents "y"
        X_train, X_test, y_train, y_test = train_test_split(
            xfeatures, xtarget, test_size=testingPct, random_state=randomState
        )

        return (X_train, X_test, y_train, y_test)

    def shuffle(self, dataset):
        np.random.shuffle(dataset)
        return

    def sample(self, dataset, count):
        sample = np.random.choice(dataset.index.values, count)
        return sample

    def normalize(self, df, type="std"):

        self.logger.info("Normalizing data, type: {0}".format(type))

        cols = df.columns.values

        inArray = df[cols].values
        outArray = None

        if type == "minmax":
            # minmax_scale = preprocessing.MinMaxScaler().fit(df[cols])
            minmax_scale = preprocessing.MinMaxScaler().fit(inArray)
            outArray = minmax_scale.transform(inArray)
        else:
            # std_scale = preprocessing.StandardScaler().fit(df[cols])
            std_scale = preprocessing.StandardScaler().fit(inArray)
            outArray = std_scale.transform(inArray)

        df = pd.DataFrame(data=outArray, columns=cols)

        return df

    def threshold(self, X, lower, lvalue, upper, uvalue):
        X[X <= lower] = lvalue
        X[X >= upper] = uvalue
        return X

    def join(self, leftDF, rightDF, onKeys):
        result = pd.merge(leftDF, rightDF, on=onKeys)
        return result

    def categorize(self, df, field):
        self.logger.debug("Categorizing field: {0}, type: {1}".format(field, df[field].dtype))

        # Create and fill new columns for the categorized field
        if df[field].dtype == "object":

            lbl = preprocessing.LabelEncoder()
            values = list(pd.Series(df[field].values.ravel()).unique())

            self.logger.debug("Field: {0}, has value count: {1}".format(field, len(values)))

            if len(values) > 2:

                for value in values:
                    # Create the new field name based upon original name and values
                    # Note -- take into account missing values
                    if pd.isnull(value):
                        xfield = field + "-" + "Missing"
                    else:
                        # Strip commas
                        xvalue = value.replace(",", "")
                        xfield = field + "-" + xvalue

                    # Create and fill in the new columns with values
                    df.loc[:, xfield] = -1
                    self.logger.debug("Creating field: {0}, type: {1}".format(xfield, df[xfield].dtype))
                    # df[xfield] = df[field].apply(lambda x: 1 if x == value else 0)
                    df.loc[:, xfield] = df.loc[:, field].apply(lambda x: 1 if x == value else 0)

                # Remove the original field
                self.logger.debug("Dropping field: {0}, type: {1}".format(field, df[field].dtype))
                df = df.drop(field, axis=1)

        return df

    def sync(self, dfA, dfB):

        self.logger.debug("Synchronizing...")

        listA = list(dfA.columns.values)
        self.logger.debug("DataFrame A, columns: {0}".format(listA))

        listB = list(dfB.columns.values)
        self.logger.debug("DataFrame B, columns: {0}".format(listB))

        setA = set(listA)
        setB = set(listB)

        columnsNotInB = setA.difference(setB)
        self.logger.debug("Columns in A but not in B: {0}".format(columnsNotInB))

        for column in columnsNotInB:
            dfB[column] = 0

        columnsNotInA = setB.difference(setA)
        self.logger.debug("Columns in B but not in A: {0}".format(columnsNotInA))

        for column in columnsNotInA:
            dfA[column] = 0

        return (dfA, dfB)

    def prune(self, df, keep=None, remove=None):
        if keep:
            self.logger.info("Pruning, keeping fields: {0}".format(keep))
            df = df[keep]

        if remove:
            self.logger.info("Pruning, removing fields: {0}".format(remove))
            df = df.drop(remove, axis=1)

        return df

    def encodeList(self, columns, dfA, dfB):

        if columns:
            lbl = preprocessing.LabelEncoder()
            for column in columns:
                dfA, dfB = self.encode(column, dfA, dfB)

        return (dfA, dfB)

    def encode(self, column, dfA, dfB):

        # Note that all input dataframes must be encoded in a similar fashion
        # and hence can not be done independently or else they will
        # get encoded based upon values present in that data set alone,
        # which is not an issue unless the values in the dataframes are
        # slightly different... for example, dataframe A (dfA) and B (dfB) both
        # have categorized values in the same specific column, but
        # dfA has values 'Y','N' and dfB has values 'maybe','sometimes','Y','N',
        # and 'almost always' then they will get encoded differently: 'Y' may be
        # encoded as '0' in dfA but '2' in dfB

        lbl = preprocessing.LabelEncoder()

        self.logger.debug("Encoding field: {0}".format(column))

        valuesA = list(dfA[column].values)
        valuesB = list(dfB[column].values)
        values = valuesA + valuesB
        lbl.fit(values)

        # self.logger.debug('Encoding field: {0}, classes: {1}'.format(column, lbl.classes_))
        # xto = lbl.transform(values)
        # xfrom = lbl.inverse_transform(xto)
        # self.logger.debug('Encoding field {0}, FROM: {1}'.format(column, xfrom))
        # self.logger.debug('Encoding field {0}, TO:   {1}'.format(column, xto))

        dfA[column] = lbl.transform(valuesA)
        dfB[column] = lbl.transform(valuesB)

        return (dfA, dfB)

Пример #2

Показать файл

Файл: Runner.py Проект: ericbroda/machine-learning

class Runner():

    packageName = 'com.brodagroup.machinelearning.common.Runner'

    logger = None
    rpt = None
    gridsearchrpt = None
    featurerpt = None
    scoringrpt = None
    preprocessor = None

    # features: dataframe used for fit / learning
    features = None

    # test: dataframe used for prediction
    test = None

    # target:  dataframe (single column) of actual/correct values (for scoring)
    target = None

    # expected: dataframe (single column) of actual values (for verification that algo works)
    expected = None
    hasExpected = False

    # y_pred: array of predictions (integer)
    y_pred = None

    # yy_pred: array of prediction probabilities (float)
    yy_pred = None

    # Initializer
    def __init__(self):
        self.logger = Logger(self.packageName).getLogger()
        return

    def dumpConfiguration(self):
        pretty = json.dumps(self.configuration, sort_keys=True, indent=4)
        return(pretty)

    def configure(self, jsonstr=None, file=None, url=None, overrides=None):
        if file:
            self.logger.info('Using configuration file: {0}'.format(file))
            with open(file, encoding='utf-8') as configurationFile:
                configuration = json.loads(configurationFile.read())
        elif url:
            configuration = urllib.urlopen(url).read()
        elif jsonstr:
            configuration = json
        else:
            raise RuntimeError('Configuration not provided (json|file|url)')

        self.configuration = configuration
        self.logger.info('Using configuration: {0}'.format(self.dumpConfiguration()))

        self.override(overrides=overrides)

        classifierCode = self.configuration['classifier']
        parameters = self.configuration['parameters']

        classifierList = ClassifierList()
        classifier = classifierList.load(classifierCode, parameters)
        self.classifier = classifier

        return

    def modifyConfiguration(self, dictionary, name, value, iter):
        iter = iter + 1

        parts = name.split('.')
        name = parts[0]

        if type(dictionary[name]) is dict:
            xdict = self.configuration[name]
            if iter > 3:
                raise('Error -- too many levels in configuration')

            xname = parts[1]
            self.modifyConfiguration(xdict, xname, value, iter)
        else:
            dictionary[name] = value
            self.logger.info('Setting name: {0} to value: {1}'.format(name, value))

        return(name)

    def override(self, overrides=None):

        if overrides:
            self.logger.info('Overriding parameters: {0}'.format(overrides))
            for nvp in overrides:
                x = nvp.split(':')

                name = x[0]
                value = x[1]
                self.modifyConfiguration(self.configuration, name, value, 0)

            self.logger.info('Using new configuration: {0}'.format(self.dumpConfiguration()))

        return

    def preprocessor(self,c):
        self.logger.info('Setting preprocessor')
        self.preprocessor = c
        return

    def load(self):
        self.logger.info('Loading data')

        data = Data()

        trainCSV = self.configuration['trainCSV']
        testCSV = self.configuration['testCSV']

        featuresPKL = self.configuration['featuresPKL']
        targetPKL = self.configuration['targetPKL']
        testPKL = self.configuration['testPKL']

        expectedCSV = None
        expectedPKL = None
        try:
            expectedCSV = self.configuration['expectedCSV']
            expectedPKL = self.configuration['expectedPKL']
        except:
            pass

        # If the dataframe (pickled) file exists, then load it
        # Otherwise, load the CSV, preprocess it, and then save it as a
        # PKL file which will reduce load times
        tmpFeatures = None
        tmpTarget = None
        tmptest = None

        if( os.path.exists(featuresPKL) ):
            self.logger.info('Loading train PKL: {0}'.format(featuresPKL))
            tmpFeatures = data.loadDataFrame(featuresPKL)
            self.logger.info('Loading target PKL: {0}'.format(targetPKL))
            tmpTarget = data.loadDataFrame(targetPKL)
            self.logger.info('Loading test PKL: {0}'.format(testPKL))
            tmpTest = data.loadDataFrame(testPKL)

        else:
            self.logger.info('Loading train CSV: {0}'.format(trainCSV))
            rawtrain = data.load(trainCSV)
            self.logger.info('Loading test CSV: {0}'.format(testCSV))
            rawtest = data.load(testCSV)

            # Preprocess the data
            tmpFeatures, tmpTarget, tmpTest = self.preprocessor.execute(rawtrain, rawtest)

            # Save the dataframe (lower load times)
            self.logger.info('Saving features PKL: {0}'.format(featuresPKL))
            data.saveDataFrame(tmpFeatures, featuresPKL)
            self.logger.info('Saving target PKL: {0}'.format(targetPKL))
            data.saveDataFrame(tmpTarget, targetPKL)
            self.logger.info('Saving test PKL: {0}'.format(testPKL))
            data.saveDataFrame(tmpTest, testPKL)

        if( expectedPKL and os.path.exists(expectedPKL) ):
            self.logger.info('loading expected PKL: {0}'.format(expectedPKL))
            tmpExpected = data.loadDataFrame(expectedPKL)
            self.hasExpected = True

        elif( expectedCSV and os.path.exists(expectedCSV) ):
            self.logger.info('Loading expected CSV: {0}'.format(expectedCSV))
            tmpExpected = data.load(expectedCSV)
            self.logger.info('Saving expected PKL: {0}'.format(expectedPKL))
            data.saveDataFrame(tmpExpected, expectedPKL)
            self.hasExpected = True

        self.features = tmpFeatures
        self.target = tmpTarget
        self.test = tmpTest

        if self.hasExpected:
            self.expected = tmpExpected

        return

    # Segment the TRAINING set into a smaller
    # cross validation set of data
    def segment(self):

        self.logger.info('Segmenting...')

        data = Data();

        totalpct = float(self.configuration['totalpct'])
        testpct = float(self.configuration['testpct'])
        randomstate = int(self.configuration['randomstate'])
        X_train, X_test, y_train, y_test = data.segment(self.features, self.target, totalpct, testpct, randomstate)

        self.X_train = X_train
        self.X_test = X_test
        self.y_train = y_train
        self.y_test = y_test

        return

    def fit(self):

        npXTrain = np.array(self.X_train).astype(np.float32)
        npyTrain = np.array(self.y_train).astype(np.int32)

        self.classifier.fit(npXTrain, npyTrain)
        return

    def crossvalidate(self):
        npXTrain = np.array(self.X_train).astype(np.float32)
        npyTrain = np.array(self.y_train).astype(np.int32)

        rptDF = self.classifier.crossvalidate(npXTrain, npyTrain)
        self.crossvalidationDF = rptDF
        pd.set_option('display.max_rows', 10000)
        self.logger.info('Cross Validation Report\n{0}'.format(rptDF))
        return

    def gridsearch(self, use=True, score='roc_auc'):

        self.logger.info('Executing grid search...')

        parameters = self.configuration['gridsearch']

        x = GridSearchCV(self.classifier, parameters, cv=6, scoring=score, verbose=10, n_jobs=6)
        #x = GridSearchCV(self.classifier, parameters, cv=5, scoring=score, verbose=10)

        npXTrain = np.array(self.X_train).astype(np.float32)
        npyTrain = np.array(self.y_train).astype(np.int32)
        x.fit(npXTrain, npyTrain)

        rpt = 'Grid Search Analysis \t\t' + str(dt.datetime.now())
        rpt = rpt + '\n\nParameters {0}'.format(parameters)
        rpt = rpt + '\n\nBest parameters set found:'
        rpt = rpt + '\n\t' + '{0}'.format(x.best_estimator_)
        rpt = rpt + '\n\nGrid Search Scores (using {0}):'.format(score)
        rpt = rpt + '\nSCORE\t\tSTDDEV(+/-)\tPARAMETERS:'
        for params, mean_score, scores in x.grid_scores_:
            rpt = rpt + '\n' + '{0:0.7f}\t{1:0.7f}'.format(mean_score, scores.std() / 2)
            for key in params:
                value = params[key]
                rpt = rpt + '\t\t{0}\t\t{1}'.format(key, value)

        if use:
            self.classifier = x.best_estimator_

        self.gridsearchrpt = rpt

        return(rpt)

    def importance(self):

        self.logger.info('Creating feature importance report...')

        rpt = None

        rpt = 'Feature Importance \t\t' + str(dt.datetime.now())
        if self.classifier == None:
            return(rpt)

        if hasattr(self.classifier, 'importance'):
            df = self.classifier.importance(self.X_train.columns.values)
            rpt = rpt + '\n\n{0}'.format(df)

        if hasattr(self.classifier, 'feature_importances_'):

            fi = pd.DataFrame(self.classifier.feature_importances_)
            columns = pd.DataFrame(self.X_train.columns.values)
            result = pd.concat([columns, fi], axis=1)
            result.columns = ['Feature', 'Importance']
            sorted = result.sort(['Importance','Feature'], ascending=[False, True])
            rpt = rpt + '\n{0}'.format(sorted)

        #pd.set_option('display.max_rows', len(sorted))
        #pd.reset_option('display.max_rows')
        self.featurerpt = rpt

        return(rpt)

    def score(self):

        self.logger.info('Scoring...')

        npXTest = np.array(self.X_test).astype(np.float32)

        y_pred = self.classifier.predict(npXTest)
        yy_pred = self.classifier.predict_proba(npXTest)[:,1]

        print('\n***')
        print(self.features.shape)
        print(self.test.shape)
        print('***\n')

        reportName = 'Cross Verification Data Report \t\t' + str(dt.datetime.now())

        scorer = Scorer()
        y_test = self.y_test
        rpt = scorer.score(
                y_test,
                y_pred,
                yy_pred,
                classifier=self.classifier,
                title=reportName,
                configuration=self.configuration )

        self.y_pred = y_pred
        self.yy_pred = yy_pred
        self.scoringrpt = rpt

        return(rpt)

    def inspect(self, name):
        x = getattr(self, name)
        return(x)

    def inquire(self, name):
        x = hasattr(self, name)
        return(x)

    def inject(self, name, value):
        x = setattr(self, name, value)
        return(x)

    def report(self):
        self.logger.info('Executing full report')

        rpt = '\nFull Report\n'

        if self.featurerpt:
            rpt = rpt + '\n\n{0}'.format(self.featurerpt)
        if self.scoringrpt:
            rpt = rpt + '\n\n{0}'.format(self.scoringrpt)
        if self.gridsearchrpt:
            rpt = rpt + '\n\n{0}'.format(self.gridsearchrpt)

        self.rpt = rpt

        return(rpt)

    def predict(self):
        self.logger.info('Predicting...')

        submissionSample = self.configuration['submissionSample']
        submissionDir = self.configuration['submissionDir']

        timestamp = dt.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')

        submissionVFile = submissionDir + '/' + 'submission-values-' + timestamp + '.csv'
        submissionPFile = submissionDir + '/' + 'submission-probabilities-' + timestamp + '.csv'
        submissionLog = submissionDir + '/' + 'submission-' + timestamp + '.txt'

        npTest = np.array(self.test).astype(np.float32)

        y_pred = self.classifier.predict(npTest)
        yy_pred = self.classifier.predict_proba(npTest)[:,1]

        predictionrpt = None
        if self.hasExpected:

            self.logger.debug('Target is available... Scoring target')
            # The second column contains the actual values
            y_test = self.expected.iloc[:,1]

            scorer = Scorer()
            reportName = '\nTarget Data Prediction Report \t\t' + timestamp
            predictionrpt = scorer.score(
                    y_test,
                    y_pred,
                    yy_pred,
                    classifier=self.classifier,
                    title=reportName,
                    configuration=self.configuration )
            print(predictionrpt)

        sample = pd.read_csv(submissionSample)
        sample.QuoteConversion_Flag = y_pred
        sample.to_csv(submissionVFile, index=False)

        probabilities = pd.read_csv(submissionSample)
        probabilities.QuoteConversion_Flag = yy_pred
        probabilities.to_csv(submissionPFile, index=False)

        mfeatures, nfeatures= self.features.shape
        mtest, ntest = self.test.shape
        mxtrain, nxtrain= self.X_train.shape
        mxtest, nxtest = self.X_test.shape

        self.logger.debug('Saving submission information')
        with open(submissionLog, 'a') as f:
            f.write('Submission Report \t\t\t Generated at: {0}'.format(timestamp))
            f.write('\n\nData Statistics:')
            f.write('\n\Feature data: \trows: {0}, columns: {1}'.format(mfeatures, nfeatures))
            f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mtest, ntest))
            f.write('\n\nCross Validation Statistics:')
            f.write('\n\tTraining data: \trows: {0}, columns: {1}'.format(mxtrain, nxtrain))
            f.write('\n\tTest data: \t\trows: {0}, columns: {1}'.format(mxtest, nxtest))
            f.write('\n\nValues file:\t\t{0}'.format(submissionVFile))
            f.write('\nProbabilities file:\t{0}'.format(submissionPFile))
            f.write('\nProbabilities file:\t{0}'.format(submissionPFile))
            f.write('\n')
            f.write('{0}'.format(self.report()))

            if predictionrpt:
                f.write('\n\n{0}'.format(predictionrpt))

        return(submissionLog, submissionVFile, submissionPFile, self.classifier)