Exemplo n.º 1
0
def write():

    data = funcs.get_data()
    data['Swim'].fillna(0.0, inplace=True)
    data['Bike'].fillna(0, inplace=True)
    data['Run'].fillna(0, inplace=True)
    data['T1'].fillna(0, inplace=True)
    data['T2'].fillna(0, inplace=True)

    data = funcs.removeNotFinished(data)
    data = funcs.convertTimes(data)
    data['Country Name'] = data['Country'].apply(funcs.getCountryName)

    udisp.title_awesome('Melhor natação')
    d = data[data.SwimN == data.SwimN.min()]
    st.table(removeAddicionalColumns(d))

    udisp.title_awesome('Melhor T1')
    d = data[data.T1N == data.T1N.min()]
    st.table(removeAddicionalColumns(d))

    udisp.title_awesome('Melhor bike')
    d = data[data.BikeN == data.BikeN.min()]
    st.table(removeAddicionalColumns(d))

    udisp.title_awesome('Melhor T2')
    d = data[data.T2N == data.T2N.min()]
    st.table(removeAddicionalColumns(d))

    udisp.title_awesome('Melhor corrida')
    d = data[data.RunN == data.RunN.min()]
    st.table(removeAddicionalColumns(d))
Exemplo n.º 2
0
def write():

    data = funcs.get_data()

    data['CountryName'] = data['Country'].apply(funcs.getCountryName)

    categorias = funcs.getCategories(data)

    option = st.sidebar.selectbox("Selecione a categoria", sorted(categorias))

    atletas_m = data.loc[data['Division'] == f'M{option}']
    atletas_f = data.loc[data['Division'] == f'F{option}']

    atletas_m.drop(
        columns=['Division', 'Division Rank', 'Gender', 'Gender Rank'],
        axis=1,
        inplace=True)
    atletas_f.drop(
        columns=['Division', 'Division Rank', 'Gender', 'Gender Rank'],
        axis=1,
        inplace=True)

    udisp.title_awesome("Top 10 registros masculinos")
    atletas_m.reset_index(drop=True)

    st.altair_chart(
        funcs.createStackPlot(funcs.getDataAndConvert(atletas_m.head(10))))

    udisp.title_awesome("Top 10 registros femininos")
    atletas_f.reset_index(drop=True)

    st.altair_chart(
        funcs.createStackPlot(funcs.getDataAndConvert(atletas_f.head(10))))
Exemplo n.º 3
0
 def handle(self, *args, **options):
     sig_time, sig_1_1, sig_1_3, sig_2_1, sig_2_2, sig_3_1, sig_3_3 = get_data(
         options['file_name'])
     sig_dict = {
         'time': sig_time,
         'sig_1_1': sig_1_1,
         'sig_1_3': sig_1_3,
         'sig_2_1': sig_2_1,
         'sig_2_2': sig_2_2,
         'sig_3_1': sig_3_1,
         'sig_3_3': sig_3_3
     }
     Signal.objects.create(name=options['file_name'], data=sig_dict)
Exemplo n.º 4
0
def write():

    data = funcs.get_data()
    data['Country Name'] = data['Country'].apply(funcs.getCountryName)

    athletes = data['Name'].unique()

    option = st.sidebar.selectbox('Buscar atleta pelo nome:', sorted(athletes))

    atleta = data.loc[data['Name'] == option]

    atleta = atleta.drop(['Country'], axis=1)

    st.table(atleta.assign(hack='').set_index('hack'))

    atleta = funcs.convertTimes(atleta)

    source = pd.DataFrame({
        'Atividade': ['Swim', 'T1', 'Bike', 'T2', 'Run'],
        'TimeN': [
            getValueUniq(atleta, 'SwimN'),
            getValueUniq(atleta, 'T1N'),
            getValueUniq(atleta, 'BikeN'),
            getValueUniq(atleta, 'T2N'),
            getValueUniq(atleta, 'RunN')
        ],
        'Tempo': [
            getValueUniq(atleta, 'Swim'),
            getValueUniq(atleta, 'T1'),
            getValueUniq(atleta, 'Bike'),
            getValueUniq(atleta, 'T2'),
            getValueUniq(atleta, 'Run')
        ],
        'Ordem': [1, 2, 3, 4, 5]
    })

    if getValueUniq(atleta, 'Overall Rank') != 'DNF':

        st.altair_chart(
            alt.Chart(source).transform_joinaggregate(
                TotalTime='sum(TimeN)', ).transform_calculate(
                    PercentOfTotal="datum.TimeN / datum.TotalTime").mark_bar().
            encode(x=alt.X('PercentOfTotal:Q',
                           axis=alt.Axis(format='.0%'),
                           title='Porcentagem do total'),
                   y=alt.Y('Atividade:N',
                           sort=alt.EncodingSortField(field="Ordem",
                                                      order='ascending')),
                   tooltip=['Tempo']).properties(height=450, width=700))
Exemplo n.º 5
0
def write():

    data = funcs.get_data()
    data = funcs.convertTimes(data)
    data = funcs.removeNotFinished(data)
    data['Country Name'] = data['Country'].apply(funcs.getCountryName)

    createPlot(data, 'Swim', 'green')

    createPlot(data, 'T1', 'red')

    createPlot(data, 'Bike', 'yellow')

    createPlot(data, 'T2', 'black')

    createPlot(data, 'Run', 'pink')
Exemplo n.º 6
0
def write():

    data = funcs.get_data()
    data = funcs.convertTimes(data)
    data['Country Name'] = data['Country'].apply(funcs.getCountryName)

    data = funcs.removeNotFinished(data)

    ##
    ##  Gráfico
    ##

    athletes = data['Name'].unique()

    option1 = st.sidebar.selectbox('Buscar atleta pelo nome:', sorted(athletes) )
    atleta1 = data.loc[data['Name'] == option1].drop(['Country'], axis=1)

    option2 = st.sidebar.selectbox('Segundo atleta:', sorted(athletes) )
    atleta2 = data.loc[data['Name'] == option2].drop(['Country'], axis=1)

    df = pd.DataFrame([
        {'val':'1', 'Order':1, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Swim'), 'TempoN': funcs.getValueUniq(atleta1, 'SwimN'), 'Atividade':'Swim'},
        {'val':'2', 'Order':1, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Swim'), 'TempoN': funcs.getValueUniq(atleta2, 'SwimN'), 'Atividade':'Swim'},
        {'val':'1', 'Order':2, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T1'), 'TempoN': funcs.getValueUniq(atleta1, 'T1N'), 'Atividade':'T1'},
        {'val':'2', 'Order':2, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T1'), 'TempoN': funcs.getValueUniq(atleta2, 'T1N'), 'Atividade':'T1'},
        {'val':'1', 'Order':3, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Bike'), 'TempoN': funcs.getValueUniq(atleta1, 'BikeN'), 'Atividade':'Bike'},
        {'val':'2', 'Order':3, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Bike'), 'TempoN': funcs.getValueUniq(atleta2, 'BikeN'), 'Atividade':'Bike'},
        {'val':'1', 'Order':4, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'T2'), 'TempoN': funcs.getValueUniq(atleta1, 'T2N'), 'Atividade':'T2'},
        {'val':'2', 'Order':4, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'T2'), 'TempoN': funcs.getValueUniq(atleta2, 'T2N'), 'Atividade':'T2'},
        {'val':'1', 'Order':5, 'name': funcs.getValueUniq(atleta1, 'Name'), 'Tempo': funcs.getValueUniq(atleta1, 'Run'), 'TempoN': funcs.getValueUniq(atleta1, 'RunN'), 'Atividade':'Run'},
        {'val':'2', 'Order':5, 'name': funcs.getValueUniq(atleta2, 'Name'), 'Tempo': funcs.getValueUniq(atleta2, 'Run'), 'TempoN': funcs.getValueUniq(atleta2, 'RunN'), 'Atividade':'Run'}
    ])

    base = alt.Chart(df).properties( width=400 )

    color_scale = alt.Scale(domain=[option1, option2], range=['#1f77b4', '#1f77b4'])


    left = base.transform_filter(
        alt.datum.val == '1'
    ).encode(
        y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')),
        x=alt.X('sum(TempoN):Q', title='Tempo', sort=alt.SortOrder('descending')),
        color=alt.Color('name:N', scale=color_scale, legend=None),
        tooltip=['Tempo:N'],
        order=alt.Order(
            'Order',
            sort='ascending'
        )
    ).mark_bar().properties(title=option1)


    middle = base.encode(
        y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')),
        text=alt.Text('Atividade:N'),
        order=alt.Order(
            'Order',
            sort='ascending'
        )
    ).mark_text().properties(width=40)

    right = base.transform_filter(
        alt.datum.val == '2'
    ).encode(
        y=alt.Y('Atividade:N', axis=None, sort=alt.EncodingSortField(field="Order", order='ascending')),
        x=alt.X('sum(TempoN):Q', title='Tempo'),
        color=alt.Color('name:N', scale=color_scale, legend=None),
        tooltip=['Tempo:N']
    ).mark_bar().properties(title=option2)

    st.altair_chart( alt.concat(left, middle, right, spacing=5) )

    ##
    ##  Tabela
    ##

    df1 = pd.DataFrame([
        {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta1, 'Swim')},
        {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta1, 'T1')},
        {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta1, 'Bike')},
        {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta1, 'T2')},
        {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta1, 'Run')},
        {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta1, 'Overall')}
    ])

    df2 = pd.DataFrame([
        {'Atividade':'Swim', 'Tempo': funcs.getValueUniq(atleta2, 'Swim')},
        {'Atividade':'T1', 'Tempo': funcs.getValueUniq(atleta2, 'T1')},
        {'Atividade':'Bike', 'Tempo': funcs.getValueUniq(atleta2, 'Bike')},
        {'Atividade':'T2', 'Tempo': funcs.getValueUniq(atleta2, 'T2')},
        {'Atividade':'Run', 'Tempo': funcs.getValueUniq(atleta2, 'Run')},
        {'Atividade':'Total', 'Tempo': funcs.getValueUniq(atleta2, 'Overall')}
    ])

    with Grid("1 1", color="#000000", background_color="#FFFFFF") as grid:
        grid.cell("a", 1, 2, 1, 2).dataframe( df1.set_index('Atividade', inplace=False) )
        grid.cell("b", 2, 3, 1, 2).dataframe( df2.set_index('Atividade', inplace=False) )


    # st.write(funcs.secondsToTime( funcs.getValueUniq(atleta2, 'RunN') ))
Exemplo n.º 7
0
def write():

    data = funcs.get_data()
    data = funcs.convertTimes(data)

    ##
    ##  Tabela
    ##

    total = data.shape[0]

    df = pd.DataFrame({
        'Athletes': [total],
        # 'Swim Finish': [  showPorcent( 100 * ( (total - data[data['Swim'].isnull()].shape[0] ) / total) ) ],
        'Swim Finish': [(total - data[data['Swim'].isnull()].shape[0])],
        'Swim DNS/DNF': [
            funcs.showPercent(100 *
                              (data['Swim'].isnull() &
                               (data['Overall Rank'].eq('DNS')
                                | data['Overall Rank'].eq('DNF'))).mean())
        ],
        # 'Bike Finish': [ showPorcent( 100 * ( (total - data[data['Bike'].isnull()].shape[0] ) / total) ) ],
        'Bike Finish': [(total - data[data['Bike'].isnull()].shape[0])],
        'Bike DNF': [
            showPorcent(100 * (data['Bike'].isnull() &
                               (data['Overall Rank'].eq('DNS')
                                | data['Overall Rank'].eq('DNF'))).mean())
        ],
        # 'Run Finish': [ showPorcent( 100 * ( (total - data[data['Run'].isnull()].shape[0] ) / total) ) ],
        'Run Finish': [(total - data[data['Run'].isnull()].shape[0])],
        'Run DNF': [
            showPorcent(
                100 *
                (data['Run'].isnull() & data['Overall Rank'].eq('DNF')).mean())
        ],
        'Overall DNS/DNF': [
            showPorcent(100 * (data['Overall Rank'].eq('DNS')
                               | data['Overall Rank'].eq('DNF')).mean())
        ],
    })

    df = df.assign(hack='').set_index('hack')

    st.table(df)

    ##
    ##  Gráficos
    ##

    data = data.drop([
        'Overall', 'Run', 'Bike', 'Swim', 'T1', 'T2', 'Division Rank',
        'Gender Rank'
    ],
                     axis=1).sort_values(['Division'], ascending=[1])
    data['Atletas'] = 1

    prepareBlock(data, 'DQ')

    prepareBlock(data, 'DNS')

    prepareBlock(data, 'DNF')
Exemplo n.º 8
0
def main():
    if args.use_original:
        df = pd.read_csv(args.csv_path)

        if args.iid:
            trn, dev, tst = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
        else:
            trn, dev, tst = np.split(df, [int(.6*len(df)), int(.8*len(df))])
        model_path = f'./ae_regressor/best_model/{args.label_type}/norm_{args.norm_type}/{args.data_type}/img_flatten/original'

    else:
        trn_loader, dev_loader, tst_loader = get_dataloader(
            csv_path=args.csv_path,
            batch_size=args.batch_size,
            label_type=args.label_type,
            iid=args.iid,
            transform=args.norm_type,
            img_size=args.img_size
        )

        # Create model
        model_path = f'./ae_regressor/best_model/{args.label_type}/norm_{args.norm_type}/{args.data_type}/CAE'
        autoencoder = F.create_model(args)
        checkpoint = torch.load(os.path.join(model_path, 'autoencoder.pkl'))
        autoencoder.module.load_state_dict(checkpoint['model'])
        encoder = autoencoder.module.encoder

    os.makedirs(model_path, exist_ok=True)
    print(f"** Training progress with {args.data_type} condition **")

    if args.test:

        load_path = os.path.join(model_path, 'lightgbm.pkl')
        with open(load_path, 'rb') as f:
            best_model = pickle.load(f)

        if args.use_original:
            x_tst, y_tst = F.get_original_data(args, tst, 'tst')
            _, y_trn = F.get_original_data(args, trn, 'trn')
        else:
            x_tst, y_tst = F.get_data(args, tst_loader, encoder, 'tst')
            _, y_trn = F.get_data(args, trn_loader, encoder, 'trn')

        y_pred = best_model.predict(x_tst)
        mae = mean_absolute_error(y_tst, y_pred)
        mape = mean_absolute_percentage_error(y_true=y_tst, y_pred=y_pred)
        print(f"[Test] MAE:{mae}, MAPE:{mape}")

        mean_value = np.full_like(y_tst, np.mean(y_trn))
        mae = mean_absolute_error(y_tst, mean_value)
        mape = mean_absolute_percentage_error(y_true=y_tst, y_pred=mean_value)
        print(f"[Mean Value] MAE:{mae}, MAPE:{mape}")

    else:
        # train data 추출
        if args.use_original:
            x_train, y_train = F.get_original_data(args, trn, 'trn')
            x_dev, y_dev = F.get_original_data(args, dev, 'dev')
        else:
            x_train, y_train = F.get_data(args, trn_loader, encoder, 'trn')
            x_dev, y_dev = F.get_data(args, dev_loader, encoder, 'dev')
        print(f'Data volumn for grid search: {len(y_train)}')

        d_train = lgb.Dataset(data=x_train, label = y_train)
        d_dev = lgb.Dataset(data=x_dev, label=y_dev)

        params = {}
        params['learning_rate'] = 0.1
        params['boosting_type'] = 'gbdt'
        params['objective'] = 'regression_l1'
        params['metric'] = 'mae'
        params['num_leaves'] = 32 # defualt: 31
        params['min_data'] = 20 # number of data in a leaf: overfitting, default: 20
        params['device'] = 'cpu'
        params['bagging_fraction'] = 0.3
        params['bagging_freq'] = 10
        params['lambda_l1'] = 0.7

        model = lgb.train(
            params=params,
            train_set=d_train,
            num_boost_round=2000,
            valid_sets=d_dev,
            verbose_eval=100,
            early_stopping_rounds=100
            )

        predict_dev = model.predict(x_dev)
        mae = mean_absolute_error(y_dev, predict_dev)
        mape = mean_absolute_percentage_error(y_dev, predict_dev)

        print(f"MAE: {mae}, MAPE: {mape}")

        with open(os.path.join(model_path, 'lightgbm.pkl'), 'wb') as f:
            pickle.dump(model, f)
Exemplo n.º 9
0
import pandas as pd
import numpy as np
import altair as alt
import streamlit as st
import utils.display as udisp
import utils.functions as funcs

data = funcs.get_data()
data = funcs.convertTimes(data)
data = funcs.removeNotFinished(data)
data['Country Name'] = data['Country'].apply(funcs.getCountryName)
data.astype({'BikeN': int})


def calculeMedianFromCat(cat, n):

    df = data[data['Division'] == cat].head(n)

    aux = {
        'Name': cat,
        'SwimN': int(df['SwimN'].median()),
        'T1N': int(df['T1N'].median()),
        'BikeN': int(df['BikeN'].median()),
        'T2N': int(df['T2N'].median()),
        'RunN': int(df['RunN'].median()),
    }

    aux['Swim'] = funcs.secondsToTime(aux['SwimN'])
    aux['T1'] = funcs.secondsToTime(aux['T1N'])
    aux['Bike'] = funcs.secondsToTime(aux['BikeN'])
    aux['T2'] = funcs.secondsToTime(aux['T2N'])
Exemplo n.º 10
0
def write():

    data = funcs.get_data()

    ##
    ##   FIRST STEP, NUMBER OF ATHLETES FROM COUNTRY
    ##

    data['Country Name'] = data['Country'].apply(funcs.getCountryName)
    data['Atletas'] = 1

    countryes_sum = data.groupby(['Country',
                                  'Country Name']).agg({"Atletas": np.sum})
    countryes_sum_values = np.array(countryes_sum['Atletas'].tolist())
    countryes_abrev = countryes_sum.index.get_level_values(0)
    countryes_names = countryes_sum.index.get_level_values(1)

    df = pd.DataFrame({
        'Country': countryes_abrev,
        'Country Name': countryes_names,
        'Atletas': countryes_sum_values
    })

    udisp.title_awesome("Quantidade de Atletas por país")

    bars = alt.Chart(df).mark_bar().encode(
        alt.Y('Atletas', type='quantitative', title='Quantidade de Atletas'),
        alt.X('Country Name:N', title='País'),
        tooltip=['Atletas']).properties(height=450)  #, width=700

    text = bars.mark_text(
        align='left',
        baseline='middle',
        dx=2  # Nudges text to right so it doesn't appear on top of the bar
    ).encode(text='Country')

    (bars + text).properties(widht=600)

    st.altair_chart(bars)

    ##
    ##  PAÍSES COM MAIS ATLETAS
    ##

    udisp.title_awesome('Top 10 países com mais atletas')

    total = data.shape[0]

    df['Porcentagem'] = 0

    # df['Porcentagem'].apply(lambda x: calcPorcent(total, x['Atletas']), axis=1 )
    df['Porcentagem'] = df.apply(
        lambda x: funcs.showPercent(funcs.calcPercent(total, x['Atletas'])),
        axis=1)

    df = df.sort_values('Atletas', ascending=False).reset_index().assign(
        hack='').set_index('hack').drop(['index'], axis=1).head(10)

    st.table(df)

    ##
    ##  PAÍSES COM MAIS VENCEDORES NAS CATEGORIAS
    ##

    udisp.title_awesome('Top países com mais vitórias')

    categorias = funcs.getCategories(data)

    totalCat = len(categorias)

    df = data[data['Division Rank'].eq('1')].groupby(
        ['Country', 'Country Name']).agg({"Atletas": np.sum})

    df = df.rename({'Atletas': 'Vitórias'}, axis='columns')

    df = df.sort_values('Vitórias', ascending=False).reset_index().assign(
        hack='').set_index('hack').head(10)

    df['Porcentagem'] = df.apply(lambda x: funcs.showPercent(
        funcs.calcPercent(totalCat, x['Vitórias'])),
                                 axis=1)

    st.table(df)

    option = st.sidebar.selectbox("Selecione a categoria", sorted(categorias))

    Atletas_m = data.loc[data['Division'] == f'M{option}']
    Atletas_f = data.loc[data['Division'] == f'F{option}']

    Atletas_m['Gender'] = 'Masculino'
    Atletas_f['Gender'] = 'Feminino'

    q = st.sidebar.selectbox("Quantidade de Atletas", [5, 10, 15, 20])

    udisp.title_awesome(f'Top {q} atletas por país na categoria')

    division_sum_m = Atletas_m.head(int(q)).groupby(
        ['Country', 'Country Name', 'Gender']).agg({"Atletas": np.sum})

    division_sum_f = Atletas_f.head(int(q)).groupby(
        ['Country', 'Country Name', 'Gender']).agg({"Atletas": np.sum})

    topAtletas = pd.concat([division_sum_f, division_sum_m])

    source = pd.DataFrame({
        'abrev': topAtletas.index.get_level_values(0),
        'name': topAtletas.index.get_level_values(1),
        'Sexo': topAtletas.index.get_level_values(2),
        'Atletas': np.array(topAtletas['Atletas'].tolist())
    })

    c = alt.Chart(source).mark_bar().encode(
        x=alt.X('Sexo:N', axis=alt.Axis(title=None)),
        y=alt.Y('Atletas:Q',
                axis=alt.Axis(offset=1)),  #, scale=alt.Scale(round=True)
        color='Sexo:N',
        column=alt.Column('name:N',
                          title='País',
                          header=alt.Header(labelAngle=270,
                                            labelAlign='right')))

    st.altair_chart(c)

    # with Grid("1 1 1", color="#000000", background_color="#FFFFFF") as grid:

    #     grid.cell("a", 1, 2, 1, 2).markdown("**Masculino**")
    #     grid.cell("b", 2, 3, 1, 2).markdown("**Feminino**")

    #     grid.cell("c", 1, 2, 2, 3).dataframe(division_sum_m)
    #     grid.cell("d", 2, 3, 2, 3).dataframe(division_sum_f)

    ##
    ##  Países com mais Atletas entre os top 5
    ##

    udisp.title_awesome(f'Top {q} atletas por país em todas categorias')

    data = funcs.removeNotFinished(data)

    data = data.astype({"Division Rank": int})

    data = data[data['Division Rank'] <= q]

    countryes_sum = data.groupby(['Country', 'Country Name',
                                  'Division']).agg({"Atletas": np.sum})

    # st.table( countryes_sum )

    source = pd.DataFrame({
        'abrev':
        countryes_sum.index.get_level_values(0),
        'name':
        countryes_sum.index.get_level_values(1),
        'Categoria':
        countryes_sum.index.get_level_values(2),
        'Atletas':
        np.array(countryes_sum['Atletas'].tolist())
    })

    color_scale = alt.Scale(
        domain=np.array(countryes_sum.index.get_level_values(2)),
        range=["#c30d24", "#f3a583", "#cccccc", "#94c6da", "#1770ab"])

    y_axis = alt.Axis(
        title=None,  #'País',
        offset=1,
        ticks=False,
        minExtent=60,
        domain=False)

    c = alt.Chart(source).mark_bar().encode(
        x='Atletas:Q',
        y=alt.Y('name:N', axis=y_axis),
        tooltip=['Atletas', 'Categoria'],
        color=alt.Color(
            'Categoria:N',
            legend=alt.Legend(title='Categoria'),
            scale=color_scale,
        ))

    st.altair_chart((c).properties(width=900))