Python CONFIG.getint примеры использования

Язык программирования: Python

Пространство имен/Пакет: package.utils.config

Класс/Тип: CONFIG

Метод/Функция: getint

Примеров на hotexamples.com: 5

Python CONFIG.getint - 5 примеров найдено. Это лучшие примеры Python кода для package.utils.config.CONFIG.getint, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get(13)

getint(5)

getfloat(3)

Основные методы

get (13)

getint (5)

getfloat (3)

Пример #1

Показать файл

    def run(self):
        apply_cols = [
            col for col in self.data.columns if col.endswith('_text')
        ]
        self.data[apply_cols] = self.data[apply_cols].applymap(
            lambda x: processText(str(x), **self.options)
        )
        if self.options['spell_correction']:
            sentences = self.data[apply_cols].applymap(
                lambda x: str(x).split()
            ).values
            sc = SpellCheck(sentences, CONFIG.getint('VARIABLES', 'TopKSpellCheck'))

            self.data[apply_cols] = self.data[apply_cols].applymap(
                lambda x: sc.correct_spelling(x)
            )
        # sys.stdout = sys.__stdout__
        self.preprocessing_complete.emit(self.data)

Пример #2

Показать файл

Файл: EvaluateWidget.py Проект: pjtinker/CATScore

    def load_file(self, f_path):
        """
        Load data from a CSV file to the workspace.
        Column 0 is used for the index column.
        chardet attempts to determine encoding if file is not utf-8.
            # Attributes
                f_path(String): The filename selected via open_file
        """
        # FIXME: Reset status bar when new data is loaded.
        try:
            self.graph.clear_graph()
            self.available_column_model.loadData([], include_labels=False)
            self.prediction_data = pd.read_csv(
                f_path,
                encoding='utf-8',
                index_col=CONFIG.getint(
                    'VARIABLES',
                    'IndexColumn'))  #TODO: user defined index column
        except UnicodeDecodeError as ude:
            self.logger.warning("UnicodeDecode error opening file",
                                exc_info=True)
            self.comms.update_statusbar.emit(
                "Attempting to determine file encoding...")
            detector = UniversalDetector()
            try:
                for line in open(f_path, 'rb'):
                    detector.feed(line)
                    if detector.done:
                        break
                detector.close()
                print("chardet determined encoding type to be {}".format(
                    detector.result['encoding']))
                self.prediction_data = pd.read_csv(
                    f_path, encoding=detector.result['encoding'], index_col=0)
            except Exception as e:
                self.logger.error("Error detecing encoding", exc_info=True)
                exceptionWarning("Exception has occured.", exception=e)
        except IOError as ioe:
            self.logger.error("IOError detecting encoding", exc_info=True)
            exceptionWarning("IO Exception occured while opening file.",
                             exception=ioe)
        except Exception as e:
            self.logger.error("Error detecting encoding", exc_info=True)
            exceptionWarning("Error occured opening file.", exception=e)

        try:
            columns = self.prediction_data.columns
            self.available_columns = []
            self.columns_with_truth = []

            self.ground_truth_columns = self.prediction_data.columns[
                ~self.prediction_data.isna().any()].tolist()

            for column in columns:
                if column.lower().endswith("text"):
                    self.available_columns.append(column)
                    column_tag = column.split('__')[0]
                    if (column_tag + '__actual' in self.ground_truth_columns):
                        self.columns_with_truth.append(column)

            if self.available_columns:
                self.available_column_model.loadData(self.available_columns,
                                                     include_labels=False)

            if self.columns_with_truth:
                self.available_column_model.setTruthData(
                    self.columns_with_truth)
                # self.full_text_count.setText(str(self.prediction_data.shape[0]))
                # self.display_selected_row(None)
                # self.select_all_btn.setEnabled(True)
                # self.deselect_all_btn.setEnabled(True)

            self.comms.update_statusbar.emit("CSV loaded.")
            # else:
            #     exceptionWarning("No allowable data discovered in file.")
        except pd.errors.EmptyDataError as ede:
            exceptionWarning('Empty Data Error.\n', exception=ede)
        except Exception as e:
            self.logger.error("Error loading dataframe", exc_info=True)
            exceptionWarning("Exception occured.  PredictWidget.load_file.",
                             exception=e)

Пример #3

Показать файл

Файл: ModelTrainer.py Проект: pjtinker/CATScore

    def train_stacker(self, x, y, col_path):
        def get_ratio(row):
            """
            Returns the ratio of agreement between column values (here, predictors) in a given row.
            """
            try:
                pred_value = row.iloc[-1]
                total_same = 0.0
                col_count = float(len(row.iloc[:-1]))
                for data in row.iloc[:-1]:
                    if data == pred_value:
                        total_same += 1.0
                return total_same / col_count
            except ZeroDivisionError as zde:
                return 0
            except Exception as e:
                self.logger.error("ModelTrainer.get_ratio", exc_info=True)
                exceptionWarning(
                    'Exception occured in ModelTrainer.get_ratio.', repr(e))

        def get_bamboozled_score(row):
            """
            Returns the difference between the number of models and the number of models who predicted incorrectly.
            The higher this value, the more bamboozling the sample
            """
            try:
                pred_value = row.iloc[-1]
                total_wrong = 0
                col_count = len(row.iloc[:-1])
                for data in row.iloc[:-1]:
                    if data != pred_value:
                        total_wrong += 1
                return col_count - total_wrong
            except Exception as e:
                self.logger.error("ModelTrainer.get_bamboozled_score",
                                  exc_info=True)
                exceptionWarning(
                    'Exception occured in ModelTrainer.get_bamboozled_score.',
                    repr(e))

        stacker_full_class = CONFIG.get(
            'VARIABLES', 'StackingAlgorithmCLassName').split('.')

        final_preds = np.empty(y.shape)
        stacker_module = '.'.join(stacker_full_class[0:-1])
        inst_module = importlib.import_module(stacker_module)
        stacker_class = getattr(inst_module, stacker_full_class[-1])
        stacker = stacker_class()
        if self.tuning_params['gridsearch']['tune_stacker']:
            self._update_log(
                f'Beginning tuning run on Stacker <b>{".".join(stacker_full_class)}</b>...'
            )
            rscv = RandomizedSearchCV(
                estimator=stacker,
                n_jobs=self.tuning_params['gridsearch']['n_jobs']
                if self.tuning_params['gridsearch']['n_jobs'] != 0 else None,
                cv=self.tuning_params['gridsearch']['cv'],
                n_iter=self.tuning_params['gridsearch']['n_iter'],
                pre_dispatch=CONFIG.get('VARIABLES', 'PreDispatch'),
                verbose=CONFIG.getint('VARIABLES',
                                      'RandomizedSearchVerbosity'),
                scoring=self.tuning_params['gridsearch']['scoring'] if
                len(self.tuning_params['gridsearch']['scoring']) > 0 else None,
                refit='accuracy')
            rscv.fit(x, y)
            best_params = rscv.best_params_
            stacker = stacker_class(**best_params)
            self._update_log('Stacker tuning completed!  Re-evaluating...')

        self._update_log(
            f'Training Stacking algorithm <b>{".".join(stacker_full_class)}</b>'
        )
        skf = StratifiedKFold(n_splits=5, random_state=RANDOM_SEED)

        for train, test in skf.split(x, y):
            with joblib.parallel_backend('dask'):
                stacker.fit(x.iloc[train], y[train])
            final_preds[test] = stacker.predict(x.iloc[test])
        # stack_preds = [1 if x > .5 else 0 for x in np.nditer(final_preds)]
        self._update_log('Stacking training complete')
        stack_scores = self.get_model_scores(y, final_preds)

        table_str = '''<table>
                            <thead>
                                <tr>
                                    <th>Accuracy</th><th>F1-Score</th><th>Cohen's Kappa</th>
                                </tr>
                            </thead>
                        <tbody>
                            <tr>
                    '''
        for metric, score in stack_scores.items():
            table_str += '<td style="border: 1px solid #333;">%.2f</td>' % score
        table_str += '</tr></tbody></table><br>'
        self._update_log(table_str, False, True)
        self._update_log('Retraining Stacker on full dataset')
        stacker.fit(x, y)
        save_path = os.path.join(col_path, 'Stacker')
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        save_file = os.path.join(save_path, 'Stacker.pkl')
        self._update_log(f'Saving Stacking algorithm to : {save_file}', False)
        joblib.dump(stacker, save_file, compress=1)
        self.model_checksums['Stacker'] = hashlib.md5(
            open(save_file, 'rb').read()).hexdigest()
        self._update_log(f'Stacking hash: {self.model_checksums["Stacker"]}')

        # Save particulars to file
        col_name = col_path.split('\\')[-1]
        stacker_info = {
            'column': col_name,
            'version_directory': self.version_directory,
            'last_train_date': time.strftime('%Y-%m-%d %H:%M:%S',
                                             time.localtime()),
            'train_eval_score': stack_scores,
            'model_checksums': self.model_checksums
        }
        stacker_json_save_file = os.path.join(save_path, 'Stacker.json')
        with open(stacker_json_save_file, 'w') as outfile:
            json.dump(stacker_info, outfile, indent=2)
        x[col_name + TRUTH_LABEL_SUFFIX] = y
        agreement_ratios = x.apply(get_ratio, axis=1)
        bamboozled = x.apply(get_bamboozled_score, axis=1)

        x[col_name + TAG_DELIMITER + 'agreement_ratio'] = agreement_ratios
        x[col_name + TAG_DELIMITER + 'bamboozled_score'] = bamboozled
        pc_len = len(x[x[col_name + TAG_DELIMITER +
                         'agreement_ratio'] <= DISAGREEMENT_THRESHOLD])
        bamboozled_len = len(x[x[col_name + TAG_DELIMITER +
                                 'bamboozled_score'] <= BAMBOOZLED_THRESHOLD])
        self._update_log(
            f"Found {pc_len} samples for {col_name} that fall at or below the {DISAGREEMENT_THRESHOLD} predictor agreement."
        )
        self._update_log(
            f"Found {bamboozled_len} samples for {col_name} that have a bamboozled score of {BAMBOOZLED_THRESHOLD} or below."
        )
        # print('HEAD OF X IN TRAIN_STACKER')
        # print(x.head())
        # print(x.columns)
        # ? What X is a dataframe  [col_name + CONFIG.get('VARIABLES', 'StackerLabelSuffix')] = final_preds
        self.all_predictions_df = pd.merge(self.all_predictions_df,
                                           x,
                                           how='outer',
                                           left_index=True,
                                           right_index=True)
        # print('HEAD OF all_redictions_df IN TRAIN_STACKER')
        # print(self.all_predictions_df.head())
        # print(self.all_predictions_df.columns)
        self._update_log('Run complete')
        self._update_log('<hr>', False, True)

Пример #4

Показать файл

Файл: ModelTrainer.py Проект: pjtinker/CATScore

    def grid_search(self,
                    model,
                    x,
                    y,
                    pipeline,
                    tuning_params,
                    n_jobs=-1,
                    n_iter=20,
                    scoring=None,
                    include_tfidf=False,
                    keras_params=None):
        '''Performs grid search on selected pipeline.

            # Arguments

                model: string, name of classifier in pipeline
                x: pandas.DataFrame, training data
                y: numpy.array, training labels
                pipeline: sklearn.model_selection.Pipeline, pipeline object containing feature extractors, feature selectors and estimator
                n_jobs: int, Number of jobs to run in parallel.
                n_iter: int, number of iterations to perform search
                scoring: list, scoring metrics to be used by the evaluator
                include_tfidf: bool:, flag to indicate tfidf is included in the pipeline
                keras_params: dict, parameters necessary for model training outside of the regular hyperparams.  e.g. input_shape, num_classes, num_features
        '''
        try:
            start_time = time.time()
            filepath = os.path.join(CONFIG.get('PATHS', 'BaseModelDirectory'),
                                    model + '.json')
            with open(filepath, 'r') as f:
                model_data = json.load(f, object_hook=cat_decoder)

            grid_params = {}
            default_params = model_data[model]

            for param_types, types in default_params.items():
                for t, params in types.items():
                    if params['tunable']:
                        param_name = model + '__' + t
                        if params['type'] == 'dropdown':
                            param_options = list(params['options'].values())
                        elif params['type'] == 'double':
                            param_options = scipy.stats.expon(
                                scale=params['step_size'])
                        elif params['type'] == 'int':
                            param_options = scipy.stats.randint(
                                params['min'], params['max'] + 1)
                        elif params['type'] == 'range':
                            param_options = [(1, 1), (1, 2), (1, 3), (1, 4)]
                        grid_params.update({param_name: param_options})
                    else:
                        continue

            if include_tfidf:
                with open(CONFIG.get('PATHS', 'BaseTfidfDirectory'), 'r') as f:
                    model_data = json.load(f, object_hook=cat_decoder)
                model_class = model_data['model_class']
                default_params = model_data[model_class]

                for param_types, types in default_params.items():
                    for t, params in types.items():
                        if params['tunable']:
                            param_name = model_class + '__' + t
                            if params['type'] == 'dropdown':
                                param_options = list(
                                    params['options'].values())
                            elif params['type'] == 'double':
                                param_options = scipy.stats.expon(
                                    scale=params['step_size'])
                            elif params['type'] == 'int':
                                param_options = scipy.stats.randint(
                                    params['min'], params['max'] + 1)
                            elif params['type'] == 'range':
                                param_options = [(1, 1), (1, 2), (1, 3),
                                                 (1, 4)]
                            else:
                                param_options = None
                            grid_params.update({param_name: param_options})
                        else:
                            continue
            # Remnant from __TENSORFLOW work.
            # if keras_params:
            #     updated_key_dict = {f'{model}__{k}':
            #         [v] for k, v in keras_params.items()}
            #     grid_params.update(updated_key_dict)

            self._update_log(f'Beginning RandomizedSearchCV on {model}...')
            rscv = RandomizedSearchCV(
                pipeline,
                grid_params,
                n_jobs=tuning_params['gridsearch']['n_jobs']
                if tuning_params['gridsearch']['n_jobs'] != 0 else None,
                cv=tuning_params['gridsearch']['cv'],
                n_iter=n_iter,
                pre_dispatch=CONFIG.get('VARIABLES', 'PreDispatch'),
                verbose=CONFIG.getint('VARIABLES',
                                      'RandomizedSearchVerbosity'),
                scoring=tuning_params['gridsearch']['scoring']
                if len(tuning_params['gridsearch']['scoring']) > 0 else None,
                refit='accuracy')
            #   refit='accuracy' if len(tuning_params['gridsearch']['scoring']) > 0 else None)  # ! FIXME: Should we allow other, non accuracy metrics here?
            with joblib.parallel_backend('dask'):
                rscv.fit(x, y)
            self.grid_search_time = time.time() - start_time
            self._update_log(
                f'RandomizedSearchCV on {model} completed in {self.grid_search_time}'
            )
            self._update_log(f'Best score for {model}: {rscv.best_score_}',
                             False)
            return rscv

        except FileNotFoundError as fnfe:
            self.logger.debug(
                'ModelTrainer.grid_search {} not found'.format(filepath))
        except Exception as e:
            self.logger.error('ModelTrainer.grid_search {}:'.format(model),
                              exc_info=True)
            tb = traceback.format_exc()
            print(tb)
            self._update_log(tb)

Пример #5

Показать файл

Файл: ModelTrainer.py Проект: pjtinker/CATScore

# import package.utils.SequenceTransformer as seq_trans

RANDOM_SEED = 1337
TOP_K = 20000
MAX_SEQUENCE_LENGTH = 1500
BASE_MODEL_DIR = './package/data/base_models'
BASE_TFIDF_DIR = './package/data/feature_extractors/TfidfVectorizer.json'
INPUT_SHAPE = (0, 0)

TAG_DELIMITER = CONFIG.get('VARIABLES', 'TagDelimiter')
PRED_LABEL_SUFFIX = CONFIG.get('VARIABLES', 'PredictedLabelSuffix')
PROB_LABEL_SUFFIX = CONFIG.get('VARIABLES', 'ProbabilityLabelSuffix')
TRUTH_LABEL_SUFFIX = CONFIG.get('VARIABLES', 'TruthLabelSuffix')
STACKER_LABEL_SUFFIX = CONFIG.get('VARIABLES', 'StackerLabelSuffix')
DISAGREEMENT_THRESHOLD = CONFIG.getfloat('VARIABLES', 'DisagreementThreshold')
BAMBOOZLED_THRESHOLD = CONFIG.getint('VARIABLES', 'BamboozledThreshold')


class ModelTrainerSignals(QObject):
    training_complete = pyqtSignal(pd.DataFrame)
    tuning_complete = pyqtSignal(bool, dict)
    update_progressbar = pyqtSignal(int, bool)
    update_training_logger = pyqtSignal(str, bool, bool)


class ModelTrainer(QRunnable):
    '''
    QThread tasked with running all model training/tuning.  
    This could potentially take days to complete.
    '''
    # Setting parallel_backend to threading allows for multi-threading from a thread.  GUI will not freeze and