Пример #1
0
def get_survival_measure_data(data):
    data = pivot_data(
        data[results.RESULT_COLUMNS('survival_alive') +
             results.RESULT_COLUMNS('survival_other') + GROUPBY_COLUMNS])
    for s in reversed(RISKS):
        data['process'], data[s] = data.process.str.split('_' + s + '_').str
    data['measure'], data['process'] = data.process.str.split('_period_').str
    data['period'], data['treatment_line'] = data.process.str.split(
        '_line_').str

    data = data.drop(columns='process')
    return sort_data(data)
Пример #2
0
def get_measure_data(data,
                     measure,
                     stratified_by_treatment=False,
                     stratified_by_risks=False):
    data = pivot_data(data[results.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS])
    data = split_processing_column(data, stratified_by_treatment,
                                   stratified_by_risks)
    return sort_data(data)
Пример #3
0
def get_treatment_count_measure_data(data, measure):
    data = pivot_data(data[results.RESULT_COLUMNS(measure) + GROUPBY_COLUMNS])
    data['process'], data['year'] = data.process.str.split('_year_').str
    data['process'], data['treatment'] = data.process.str.split(
        '_treatment_').str
    data['process'], data['treatment_line'] = data.process.str.split(
        'line_').str
    data = data.drop(columns='process')
    return sort_data(data)
Пример #4
0
def get_registry_measure_data(data):
    data = pivot_data(data[results.RESULT_COLUMNS('registry_status') +
                           GROUPBY_COLUMNS])
    data['process'], data[
        'renal_function_at_diagnosis'] = data.process.str.split(
            '_renal_function_at_diagnosis_').str
    data['process'], data[
        'race_and_cytogenetic_risk_at_diagnosis'] = data.process.str.split(
            '_race_and_cytogenetic_risk_at_diagnosis_').str
    data['process'], data['age'] = data.process.str.split('_in_age_group_').str
    data['process'], data['sex'] = data.process.str.split('_among_').str
    data['year'] = data.process.str.split('_in_').str[-1]
    data['measure'] = data.process.str.split('_in_').str[:-1].apply(
        lambda x: '_in_'.join(x))
    data = data.drop(columns='process')
    return sort_data(data)
Пример #5
0
def aggregate_over_seed(data):
    non_count_columns = []
    for non_count_template in results.NON_COUNT_TEMPLATES:
        non_count_columns += results.RESULT_COLUMNS(non_count_template)
    count_columns = [
        c for c in data.columns if c not in non_count_columns + GROUPBY_COLUMNS
    ]

    # non_count_data = data[non_count_columns + GROUPBY_COLUMNS].groupby(GROUPBY_COLUMNS).mean()
    count_data = data[count_columns +
                      GROUPBY_COLUMNS].groupby(GROUPBY_COLUMNS).sum()
    return pd.concat(
        [
            count_data,
            # non_count_data
        ],
        axis=1).reset_index()
Пример #6
0
def get_population_data(data):
    total_pop = pivot_data(
        data[[results.TOTAL_POPULATION_COLUMN] +
             results.RESULT_COLUMNS('population') + GROUPBY_COLUMNS])
    total_pop = total_pop.rename(columns={'process': 'measure'})
    return sort_data(total_pop)