Python TabularDataset.fillna примеры использования

Язык программирования: Python

Пространство имен/Пакет: autogluon.tabular

Класс/Тип: TabularDataset

Метод/Функция: fillna

Примеров на hotexamples.com: 2

Python TabularDataset.fillna - 2 примера найдено. Это лучшие примеры Python кода для autogluon.tabular.TabularDataset.fillna, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TabularDataset(15)

drop(9)

head(3)

fillna(2)

sample(1)

Пример #1

Показать файл

Файл: inference.py Проект: jkroll-aws/amazon-sagemaker-examples

def transform_fn(models, data, input_content_type, output_content_type):
    """
    Transform a request using the Gluon model. Called once per request.
    :param models: The Gluon model and the column info.
    :param data: The request payload.
    :param input_content_type: The request content type. ('text/csv')
    :param output_content_type: The (desired) response content type. ('text/csv')
    :return: response payload and content type.
    """
    start = timer()
    net = models[0]
    column_dict = models[1]
    label_map = net.class_labels_internal_map  ###

    # text/csv
    if "text/csv" in input_content_type:
        # Load dataset
        columns = column_dict["columns"]

        if type(data) == str:
            # Load dataset
            df = pd.read_csv(StringIO(data), header=None)
        else:
            df = pd.read_csv(StringIO(data.decode()), header=None)

        df_preprosessed = preprocess(df, columns, net.label)

        ds = TabularDataset(data=df_preprosessed)

        try:
            predictions = net.predict_proba(ds)
            predictions_ = net.predict(ds)
        except:
            try:
                predictions = net.predict_proba(ds.fillna(0.0))
                predictions_ = net.predict(ds.fillna(0.0))
                warnings.warn("Filled NaN's with 0.0 in order to predict.")
            except Exception as e:
                response_body = e
                return response_body, output_content_type

        # threshold = 0.5
        # predictions_label = [[k for k, v in label_map.items() if v == 1][0] if i > threshold else [k for k, v in label_map.items() if v == 0][0] for i in predictions]
        predictions_label = predictions_.tolist()

        # Print prediction counts, limit in case of regression problem
        pred_counts = Counter(predictions_label)
        n_display_items = 30
        if len(pred_counts) > n_display_items:
            print(f"Top {n_display_items} prediction counts: "
                  f"{dict(take(n_display_items, pred_counts.items()))}")
        else:
            print(f"Prediction counts: {pred_counts}")

        # Form response
        output = StringIO()
        pd.DataFrame(predictions).to_csv(output, header=False, index=False)
        response_body = output.getvalue()

        # If target column passed, evaluate predictions performance
        target = net.label
        if target in ds:
            print(f"Label column ({target}) found in input data. "
                  "Therefore, evaluating prediction performance...")
            try:
                performance = net.evaluate_predictions(
                    y_true=ds[target],
                    y_pred=np.array(predictions_label),
                    auxiliary_metrics=True)
                print(
                    json.dumps(performance,
                               indent=4,
                               default=pd.DataFrame.to_json))
                time.sleep(0.1)
            except Exception as e:
                # Print exceptions on evaluate, continue to return predictions
                print(f"Exception: {e}")
    else:
        raise NotImplementedError("content_type must be 'text/csv'")

    elapsed_time = round(timer() - start, 3)
    print(f"Elapsed time: {round(timer()-start,3)} seconds")

    return response_body, output_content_type

Пример #2

Показать файл

Файл: inference.py Проект: vrad1/amazon-sagemaker-examples

def transform_fn(models, data, input_content_type, output_content_type):
    """
    Transform a request using the Gluon model. Called once per request.
    :param models: The Gluon model and the column info.
    :param data: The request payload.
    :param input_content_type: The request content type. ('text/csv')
    :param output_content_type: The (desired) response content type. ('text/csv')
    :return: response payload and content type.
    """
    start = timer()
    net = models[0]
    column_dict = models[1]

    # text/csv
    if input_content_type == 'text/csv':
        
        # Load dataset
        columns = column_dict['columns']
        df = pd.read_csv(StringIO(data), header=None)

        df_preprosessed = preprocess(df, columns, net.label)

        ds = TabularDataset(data=df_preprosessed)
        
        try:
            predictions = net.predict(ds)
        except:
            try:
                predictions = net.predict(ds.fillna(0.0))
                warnings.warn('Filled NaN\'s with 0.0 in order to predict.')
            except Exception as e:
                response_body = e
                return response_body, output_content_type
        
        # Print prediction counts, limit in case of regression problem
        pred_counts = Counter(predictions.tolist())
        n_display_items = 30
        if len(pred_counts) > n_display_items:
            print(f'Top {n_display_items} prediction counts: '
                  f'{dict(take(n_display_items, pred_counts.items()))}')
        else:
            print(f'Prediction counts: {pred_counts}')

        # Form response
        output = StringIO()
        pd.DataFrame(predictions).to_csv(output, header=False, index=False)
        response_body = output.getvalue() 

        # If target column passed, evaluate predictions performance
        target = net.label
        if target in ds:
            print(f'Label column ({target}) found in input data. '
                  'Therefore, evaluating prediction performance...')    
            try:
                performance = net.evaluate_predictions(y_true=ds[target], 
                                                       y_pred=predictions, 
                                                       auxiliary_metrics=True)                
                print(json.dumps(performance, indent=4, default=pd.DataFrame.to_json))
                time.sleep(0.1)
            except Exception as e:
                # Print exceptions on evaluate, continue to return predictions
                print(f'Exception: {e}')
    else:
        raise NotImplementedError("content_type must be 'text/csv'")

    elapsed_time = round(timer()-start,3)
    print(f'Elapsed time: {round(timer()-start,3)} seconds')           
    
    return response_body, output_content_type