Python ReadLabelsData примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils

Класс/Тип: ReadLabelsData

Примеров на hotexamples.com: 7

Python ReadLabelsData - 7 примеров найдено. Это лучшие примеры Python кода для utils.ReadLabelsData, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ReadLabelsData(7)

apply(5)

merge(2)

Основные методы

ReadLabelsData (7)

apply (5)

merge (2)

Пример #1

Показать файл

def gen_time_feature(stats_name, version='v1', kind='train', agg='mean'):
    """
    :param stats_name:
    :param version:
    :param kind:
    :return:
    """
    # 0 被统计的值
    values = [
        'SBP', 'DBP', 'HEART_RATE_TIMES', 'GLU', 'HEIGHT', 'WEIGHT', 'BMI'
    ]
    # 1 读取历史数据
    followup = ReadHistData(info='followup_person_info',
                            version=version,
                            kind=kind)
    labels = ReadLabelsData(version, kind)
    #  2 计算特征
    labels['stats_dict'] = labels.apply(
        lambda label: compute_time_feature_dict(
            filter_hist_data(label, followup), values, stats_name, agg),
        axis=1)
    v = DictVectorizer()
    stats_matrix = v.fit_transform(labels['stats_dict'].values).toarray()
    value_names = v.get_feature_names()
    feature_names = [
        '{}_{}_{}'.format(value_name, stats_name, agg)
        for value_name in value_names
    ]
    stats_df = pd.DataFrame(data=stats_matrix, columns=feature_names)
    labels = pd.concat([labels, stats_df], axis=1)
    #  3 保存特征
    for feat in feature_names:
        SaveFeature(labels, feat, version, kind)
    return feature_names

Пример #2

Показать файл

Файл: _0_2_gen_time_base_feature.py Проект: datamininger/cardiovascularriskprediction

def gen_time_base_feature(version, kind):
    """
    :param version:
    :param kind:
    :return:
    """
    labels = ReadLabelsData(version=version, kind=kind)
    hist_data = ReadHistData(version=version,
                             info='followup_person_info',
                             kind=kind)

Пример #3

Показать файл

Файл: _5_1_gen_action_time_feature.py Проект: datamininger/cardiovascularriskprediction

def gen_action_time_feature(stats_name, version='v2', kind='train'):
    """
    :param stats_name:
    :param version:
    :param kind:
    :return:
    """

    # 1 读取历史数据
    followup = ReadHistData(info='followup_person_info',
                            version=version,
                            kind=kind)
    labels = ReadLabelsData(version, kind)
    #  2 计算特征
    labels[stats_name] = labels.apply(
        lambda label: compute_action_time_feature(
            filter_hist_data(label, followup), stats_name),
        axis=1)
    SaveFeature(labels, stats_name, version, kind)
    return stats_name

Пример #4

Показать файл

Файл: _0_1_gen_base_feature.py Проект: datamininger/cardiovascularriskprediction

def gen_base_feature2(version, kind='train'):
    #  1 读取历史数据
    followup = ReadHistData(info='followup_person_info', version=version, kind=kind)
    labels = ReadLabelsData(version, kind)

    # 2 第一次随访的age
    labels = labels.merge(followup.groupby('ID')['DATE_OF_BIRTH'].max().reset_index(), on='ID', how='left')
    first_followup_time_df = followup.groupby('ID')['FOLLOWUP_DATE'].min().to_frame('first_followup_time').reset_index()
    labels = labels.merge(first_followup_time_df, on='ID', how='left')
    labels['first_followup_age'] = (labels['first_followup_time'] - labels['DATE_OF_BIRTH']).dt.days
    SaveFeature(labels, 'first_followup_age', version, kind)

    # 3 第一次随访到确认高血压的时间
    labels = labels.merge(followup.groupby('ID')['CONFIRM_DATE'].max().reset_index(), on='ID', how='left')
    labels['first_followup_time_diff_confirm_time'] = (labels['first_followup_time'] - labels['CONFIRM_DATE']).dt.days
    SaveFeature(labels, 'first_followup_time_diff_confirm_time', version, kind)

    # 4 当前时间到随访的时间
    labels['TimePoint_diff_first_followup_time'] = (labels['TimePoint'] - labels['first_followup_time']).dt.days
    SaveFeature(labels, 'TimePoint_diff_first_followup_time', version, kind)

Пример #5

Показать файл

Файл: compute_hist_data_type.py Проект: datamininger/cardiovascularriskprediction

def run_hist_data_type(version, kind):
    """
    :param version:
    :param kind:
    :return:
    """
    hist_data = ReadHistData(version=version,
                             info='followup_person_info',
                             kind=kind)
    labels = ReadLabelsData(version=version, kind=kind)
    labels['data_type'] = labels.apply(lambda label: compute_hist_data_type(
        filter_hist_data(label=label, followup=hist_data)),
                                       axis=1)

    for date_type in ['1', '2', '3', '4']:
        mask = (hist_data['date_type'] == date_type)
        pd.Series(mask).to_pickle(
            get_path_labels() +
            '{}_mask_{}_{}.pkl'.format(version, date_type, kind))
    return

Пример #6

Показать файл

def gen_missing_ratio(value, version='v1', kind='train'):
    """
    :param value:
    :param version:
    :param kind:
    :return:
    """
    # 0 特征名
    feature_name = '{}_missing_ratio'.format(value)
    # 1 读取历史数据
    followup = ReadHistData(info='followup_person_info',
                            version=version,
                            kind=kind)
    labels = ReadLabelsData(version, kind)
    #  2 计算特征
    labels[feature_name] = labels.apply(lambda label: compute_missing_ratio(
        filter_hist_data(label, followup), value),
                                        axis=1)
    # 3 保存特征
    SaveFeature(labels, feature_name, version, kind)
    return

Пример #7

Показать файл

Файл: _0_1_gen_base_feature.py Проект: datamininger/cardiovascularriskprediction

def gen_base_feature(version, kind='train'):
    """
    计算年龄、性别、确认时长
    :param kind:
    :return:
    """
    #  1 读取历史数据
    followup = ReadHistData(info='followup_person_info', version=version, kind=kind)
    labels = ReadLabelsData(version, kind)
    # 1 性别
    labels = labels.merge(followup.groupby('ID')['SEX_CODE'].max().to_frame('SEX_CODE').reset_index(), on=['ID'], how='left')
    SaveFeature(labels, 'SEX_CODE', version, kind)
    # 2 在时间点的年龄
    labels['age'] = labels.apply(lambda label: compute_age(filter_hist_data(label, followup), label), axis=1)
    SaveFeature(labels, 'age', version, kind)
    # 3 确认高血压时的年龄
    labels = labels.merge(
        followup.groupby('ID').apply(lambda df_person:
                                     (df_person['CONFIRM_DATE'].max() - df_person['DATE_OF_BIRTH'].max()).days).to_frame('confirm_age').reset_index(), on=['ID'], how='left')
    SaveFeature(labels, 'confirm_age', version, kind)
    # 4 时间点与确认高血压时间的差
    labels['time_diff_confirm_2TimePoint'] = (labels['age'] - labels['confirm_age'])
    SaveFeature(labels, 'time_diff_confirm_2TimePoint', version, kind)