Пример #1
0
def plot_histogram(plt_title, df, ctrl):
    result = []
    x_lab = get_label(ctrl['ypar'])
    df = df[(df['PARM'] == ctrl['ypar']) & (df['RESULT'] > 0)]
    df = df[['RESULT']]
    brush = alt.selection(type='interval', encodings=['x'])

    if ctrl['max_x'] == ctrl['min_x']:
        scx = alt.Scale()
    else:
        scx = alt.Scale(domain=(ctrl['min_x'], ctrl['max_x']))
    #use bin width if user defined
    if ctrl['bin_size'] > 0:
        bin_def = alt.Bin(step=ctrl['bin_size'])
    else:
        bin_def = alt.Bin()

    base = alt.Chart(df, title=plt_title).mark_bar().encode(
        alt.X("RESULT:Q", bin=bin_def, title=x_lab, scale=scx),
        y='count()',
    )

    result.append(base)
    result.append(df)
    return result
Пример #2
0
def heatmap(
    data=None,
    x=0,
    y=1,
    color=2,
    opacity=None,
    aggregate="average",
    height=600,
    width=800,
):
    """Generate a heatmap."""
    data, nx, ny = maxbins(data)
    if color is None:
        color = ""
        aggregate = "count"
    color = alt.Color(
        aggregate + "(" + color + "):Q",
        scale=hue_scale_dark if opacity is not None else hue_scale_light,
    )
    enc_opt_args = dict(opacity=opacity) if opacity is not None else dict()
    return (alt.Chart(data, height=height, width=width).mark_rect().encode(
        x=alt.X(x, bin=alt.Bin(maxbins=nx)),
        y=alt.Y(y, bin=alt.Bin(maxbins=ny)),
        color=color,
        **enc_opt_args,
    ))
Пример #3
0
def violinplot(x=None, y=None, data=None, orient=None):
    # TODO: automatically infer orientation

    if orient is None or orient == 'v':
        kwargs = dict(
                    x=alt.X('count(*):Q',
                            axis=alt.Axis(grid=False, labels=False),
                            stack='center',
                            title=''),
                    y=alt.Y('{y}:Q'.format(y=y), bin=alt.Bin(maxbins=100)),
                    column='{x}:N'.format(x=x),
                    color='{x}:N'.format(x=x)
        )
    else:
        kwargs = dict(
                    y=alt.Y('count(*):Q',
                            axis=alt.Axis(grid=False, labels=False),
                            stack='center',
                            title=''),
                    x=alt.X('{x}:Q'.format(x=x), bin=alt.Bin(maxbins=100)),
                    row='{y}:N'.format(y=y),
                    color='{y}:N'.format(y=y)
        )
    chart = alt.Chart(data).mark_area().encode(**kwargs)
    return chart
Пример #4
0
def get_histogram_with_scatterplot(data, x_variable, y_variable):
    base = alt.Chart(data)
    area_args = {'opacity': .3, 'interpolate': 'step'}
    points = base.mark_circle().encode(alt.X(x_variable),
                                       alt.Y(y_variable),
                                       color='species',
                                       size=y_variable)

    # top histogram
    top_hist = base.mark_area(**area_args).encode(
        alt.X(x_variable + ':Q', bin=alt.Bin(maxbins=20), stack=None,
              title=''),
        alt.Y('count()', stack=None, title=''),
        alt.Color('species:N'),
    ).properties(height=200)

    # right histogram
    right_hist = base.mark_area(**area_args).encode(
        alt.Y(
            y_variable + ':Q',
            bin=alt.Bin(maxbins=20),
            stack=None,
            title='',
        ),
        alt.X('count()', stack=None, title=''),
        alt.Color('species:N'),
    ).properties(width=200)

    graph = top_hist & (points | right_hist)
    return graph
Пример #5
0
def plot_heatmap(df, x_name='price'):
    """
    Plot a heatmap of showing the average value of wines from popular grape varieties at a range of price points.

    Parameters:
    -----------
    df -- (pandas DataFrame) Cleaned data in a dataframe.

    Returns altiar plot objects.
    """

    # register the custom theme under a chosen name
    alt.themes.register('vino_special', vino_special)

    # enable the newly registered theme
    alt.themes.enable('vino_special')

    varieties_chart_data = wrangle_varieties(df)

    if x_name == 'price':
        varieties_heatmap_plot = alt.Chart(
            varieties_chart_data.query('price < 50')
        ).mark_rect().encode(
            x=alt.X(x_name + ':Q', bin=alt.Bin(maxbins=10), title="Price ($)"),
            y=alt.Y('variety:O', title="Grape Variety"),
            color=alt.Color('average(value_scaled):Q',
                            scale=alt.Scale(scheme="bluepurple"),
                            legend=alt.Legend(orient='right',
                                              title="Average Value")),
            tooltip=[
                alt.Tooltip('average(points):Q', format='.2f'),
                alt.Tooltip('average(price)', format='$.2f'),
                alt.Tooltip('average(value_scaled)', format='.2f'),
                alt.Tooltip('count(title)')
            ]
        ).properties(
            title="Average Value Scores for Popular Grape Varieties, by Price"
        ).configure_axis(grid=False, labelAngle=0)

    if x_name == 'points':
        varieties_heatmap_plot = alt.Chart(varieties_chart_data).mark_rect(
        ).encode(
            x=alt.X('points:Q', bin=alt.Bin(maxbins=10), title="Rating"),
            y=alt.Y('variety:O', title="Grape Variety"),
            color=alt.Color('average(value_scaled):Q',
                            scale=alt.Scale(scheme="bluepurple"),
                            legend=alt.Legend(orient='right',
                                              title="Average Value")),
            tooltip=[
                alt.Tooltip('average(points):Q', format='.2f'),
                alt.Tooltip('average(price)', format='$.2f'),
                alt.Tooltip('average(value_scaled)', format='.2f'),
                alt.Tooltip('count(title)')
            ]
        ).properties(
            title="Average Value Scores for Popular Grape Varieties, by Rating"
        ).configure_axis(grid=False, labelAngle=0)

    return varieties_heatmap_plot
Пример #6
0
def plot_model(X, y, model, predict_proba = False):
    
    # Join data for plotting
    sample = (X.join(y))
    # Create a mesh for plotting
    step = (X.max() - X.min()) / 50
    x1, x2 = np.meshgrid(np.arange(sample.min()[0]-step[0], sample.max()[0]+step[0], step[0]),
                         np.arange(sample.min()[1]-step[1], sample.max()[1]+step[1], step[1]))

    # Store mesh in dataframe
    mesh_df = pd.DataFrame(np.c_[x1.ravel(), x2.ravel()], columns=['x1', 'x2'])

    # Mesh predictions
    if predict_proba:
        mesh_df['predictions'] = model.predict_proba(mesh_df[['x1', 'x2']])[:, 1]
        # Plot
        base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode(
            x=alt.X('x1', bin=alt.Bin(step=step[0]), axis=alt.Axis(title=X.columns[0])),
            y=alt.Y('x2', bin=alt.Bin(step=step[1]), axis=alt.Axis(title=X.columns[1])),
            color=alt.Color('predictions', title='P(red)', scale=alt.Scale(scheme='blueorange'))
        ).properties(
            width=400,
            height=400
        )
        return alt.layer(base_plot).configure_axis(
            labelFontSize=20,
            titleFontSize=20
        ).configure_legend(
            titleFontSize=20,
            labelFontSize=20
        )
    else:
        mesh_df['predictions'] = model.predict(mesh_df[['x1', 'x2']])
        # Plot
        scat_plot = alt.Chart(sample).mark_circle(
            stroke='black',
            opacity=1,
            strokeWidth=1.5,
            size=100
        ).encode(
            x=alt.X(X.columns[0], axis=alt.Axis(labels=True, ticks=True, title=X.columns[0])),
            y=alt.Y(X.columns[1], axis=alt.Axis(labels=True, ticks=True, title=X.columns[1])),
            color=alt.Color(y.columns[0])
        )
        base_plot = alt.Chart(mesh_df).mark_rect(opacity=0.5).encode(
            x=alt.X('x1', bin=alt.Bin(step=step[0])),
            y=alt.Y('x2', bin=alt.Bin(step=step[1])),
            color=alt.Color('predictions', title='Legend')
        ).properties(
            width=400,
            height=400
        )
        return alt.layer(base_plot, scat_plot).configure_axis(
            labelFontSize=20,
            titleFontSize=20
        ).configure_legend(
            titleFontSize=20,
            labelFontSize=20
        )
Пример #7
0
    def initialize_chart(self):
        self.tooltip = False
        measure = self.vis.get_attr_by_data_model("measure",
                                                  exclude_record=True)[0]
        msr_attr = self.vis.get_attr_by_channel(measure.channel)[0]
        x_min = self.vis.min_max[msr_attr.attribute][0]
        x_max = self.vis.min_max[msr_attr.attribute][1]

        x_range = abs(
            max(self.vis.data[msr_attr.attribute]) -
            min(self.vis.data[msr_attr.attribute]))
        plot_range = abs(x_max - x_min)
        markbar = x_range / plot_range * 12

        if measure.channel == "x":
            chart = (alt.Chart(self.data).mark_bar(size=markbar).encode(
                alt.X(
                    msr_attr.attribute,
                    title=f"{msr_attr.attribute} (binned)",
                    bin=alt.Bin(binned=True),
                    type=msr_attr.data_type,
                    axis=alt.Axis(labelOverlap=True),
                    scale=alt.Scale(domain=(x_min, x_max)),
                ),
                alt.Y("Number of Records", type="quantitative"),
            ))
        elif measure.channel == "y":
            chart = (alt.Chart(self.data).mark_bar(size=markbar).encode(
                x=alt.X("Number of Records", type="quantitative"),
                y=alt.Y(
                    msr_attr.attribute,
                    title=f"{msr_attr.attribute} (binned)",
                    bin=alt.Bin(binned=True),
                    axis=alt.Axis(labelOverlap=True),
                    scale=alt.Scale(domain=(x_min, x_max)),
                ),
            ))
        #####################################
        ## Constructing Altair Code String ##
        #####################################

        self.code += "import altair as alt\n"
        # self.code += f"visData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n"
        self.code += f"visData = pd.DataFrame({str(self.data.to_dict())})\n"
        if measure.channel == "x":
            self.code += f"""
		chart = alt.Chart(visData).mark_bar(size={markbar}).encode(
		    alt.X('{msr_attr.attribute}', title='{msr_attr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({x_min}, {x_max}))),
		    alt.Y("Number of Records", type="quantitative")
		)
		"""
        elif measure.channel == "y":
            self.code += f"""
		chart = alt.Chart(visData).mark_bar(size={markbar}).encode(
		    alt.Y('{msr_attr.attribute}', title='{msr_attr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msr_attr.data_type}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({x_min}, {x_max}))),
		    alt.X("Number of Records", type="quantitative")
		)
		"""
        return chart
Пример #8
0
def test_df_hexbin_C():
    df = pd.DataFrame({"x": range(10), "y": range(10), "C": range(10)})
    gridsize = 10
    plot = df.vgplot.hexbin(x="x", y="y", C="C", gridsize=gridsize)
    assert plot.mark == "rect"
    utils.check_encodings(plot, x="x", y="y", color="C")
    assert plot["encoding"]["x"]["bin"] == alt.Bin(maxbins=gridsize)
    assert plot["encoding"]["y"]["bin"] == alt.Bin(maxbins=gridsize)
    assert plot["encoding"]["color"]["aggregate"] == "mean"
Пример #9
0
def plot_heat(selected_state, axis, price_value, points_value):
    if selected_state == 'select your state':
        df_filtered = df
    else:
        if type(selected_state) == list:
            df_filtered = df[df['state'].isin(selected_state)]
        else:
            df_filtered = df[df['state'] == selected_state]
    df_filtered = df_filtered[(df_filtered['price'] >= min(price_value))
                              & (df_filtered['price'] <= max(price_value))]
    df_filtered = df_filtered[(df_filtered['points'] >= min(points_value))
                              & (df_filtered['points'] <= max(points_value))]

    if axis == 'price':
        heatmap = alt.Chart(
            df_filtered.query('price < 100')).mark_rect().encode(
                x=alt.X("price" + ':Q',
                        bin=alt.Bin(maxbins=10),
                        title="Price($)"),
                y=alt.Y('variety:O', title="Wine Variety"),
                color=alt.Color('average(price):Q',
                                scale=alt.Scale(scheme="bluepurple"),
                                legend=alt.Legend(orient='right',
                                                  title="Average price")),
                tooltip=[
                    alt.Tooltip('average(points):Q', format='.2f'),
                    alt.Tooltip('average(price)', format='$.2f'),
                    alt.Tooltip('average(value)', format='.2f'),
                    alt.Tooltip('count(title)')
                ]).properties(title="Average price for Popular Grape Varieties"
                              ).configure_axis(labelFontSize=12,
                                               titleFontSize=12,
                                               grid=False,
                                               labelAngle=0).properties(
                                                   width=300, height=300)
    if axis == "points":
        heatmap = alt.Chart(df_filtered).mark_rect().encode(
            x=alt.X("points" + ':Q',
                    bin=alt.Bin(maxbins=10),
                    title="Rating Score"),
            y=alt.Y('variety:O', title="Wine Variety"),
            color=alt.Color('average(points):Q',
                            scale=alt.Scale(scheme="bluepurple"),
                            legend=alt.Legend(orient='right',
                                              title="Average rating")),
            tooltip=[
                alt.Tooltip('average(points):Q', format='.2f'),
                alt.Tooltip('average(price)', format='$.2f'),
                alt.Tooltip('average(value)', format='.2f'),
                alt.Tooltip('count(title)')
            ]).properties(title="Average rating for Popular Grape Varieties"
                          ).configure_axis(labelFontSize=12,
                                           titleFontSize=12,
                                           grid=False,
                                           labelAngle=0).properties(width=300,
                                                                    height=300)
    return heatmap.to_html()
Пример #10
0
def _heatmap(table: alt.Data,
             *,
             is_ticks: bool,
             name: str,
             x_name: str,
             range_color: str,
             height: int = None,
             width: int = None,
             selection: alt.Selection = None,
             x_scale: alt.Scale = alt.Undefined,
             y_scale: alt.Scale = alt.Undefined,
             brush: alt.Selection = alt.Undefined) -> alt.Chart:
    base = alt.Chart(table)
    if selection is not None:
        base = base.add_selection(selection)

    if not is_ticks:
        scat_x_title = x_name
        scat_y_title = name
    else:
        scat_x_title = ''
        scat_y_title = ''

    scat = (base.mark_rect().encode(x=alt.X('x:Q',
                                            scale=x_scale,
                                            title=scat_x_title,
                                            bin=alt.Bin(maxbins=50,
                                                        extent=brush)),
                                    y=alt.Y('y:Q',
                                            scale=y_scale,
                                            title=scat_y_title,
                                            bin=alt.Bin(maxbins=50,
                                                        extent=brush)),
                                    color=alt.Color(
                                        'count():Q',
                                        scale=alt.Scale(scheme='greenblue'))))

    if is_ticks:
        tick_axis = alt.Axis(labels=False, domain=False, ticks=False)

        x_ticks = base.mark_tick().encode(x=alt.X('x:Q',
                                                  axis=tick_axis,
                                                  scale=x_scale,
                                                  title=x_name),
                                          color=alt.value(range_color))

        y_ticks = alt.Chart(table).mark_tick().encode(
            y=alt.X('y:Q', axis=tick_axis, scale=y_scale, title=name),
            color=alt.value(range_color))

        scat = scat.properties(width=width, height=height)
        x_ticks = x_ticks.properties(width=width)
        y_ticks = y_ticks.properties(height=height)
        scat = y_ticks | (scat & x_ticks)

    return scat
Пример #11
0
def plot_ratio(lengths_data, fullpath):
    logging.info("Generating plots")

    lengths_data = pandas.read_csv(lengths_data, sep="\t")

    hist = altair.Chart(lengths_data)\
        .mark_bar(clip = True)\
        .encode(x = altair.X('codingRatio:Q',
                             bin = altair.Bin(step = 0.1),
                             scale = altair.Scale(domain=(0, 2)),
                             axis = altair.Axis(title='Query/Reference Ratio')
                            ),
                y = 'count()',
               tooltip = 'count()')\
        .configure_mark(
            fill = 'red',
            stroke = 'black')

    histzoom = altair.Chart(lengths_data)\
    .mark_bar(clip = True)\
    .encode(x = altair.X('codingRatio:Q',
                         bin = altair.Bin(step = 0.1),
                         scale = altair.Scale(domain=(0, 2)),
                         axis = altair.Axis(title='Query/Reference Ratio')
                        ),
            y = altair.Y('count()',
                        scale = altair.Scale(domain = (0,100))
                        ),
           tooltip = 'count()')\
    .configure_mark(
        fill = ' #c658dd ',
        stroke = 'black')

    genomeRatio = altair.Chart(lengths_data)\
    .mark_line(clip = True)\
    .encode(x = altair.X('geneNumber:Q',
                         scale = altair.Scale(domain = (0, len('geneNumber')))
                        ),
            y = altair.Y('codingRatio:Q',
                        scale = altair.Scale(type = 'log')),
            tooltip = 'codingRatio:Q')\
    .interactive()

    # save outputs
    logging.info("Saving image files to html")
    hist.save(fullpath + '-ratioplot-full' + '.html')
    histzoom.save(fullpath + '-ratioplot-zoom' + '.html')
    genomeRatio.save(fullpath + '-ratioplot-genome' + '.html')

    #    print("Saving image files to png")
    #    hist.save(fullpath + '-ratioplot-full' + '.png', webdriver='firefox')
    #    histzoom.save(fullpath + '-ratioplot-zoom' + '.png', webdriver='firefox')
    #    genomeRatio.save(fullpath + '-ratioplot-genome' + '.png', webdriver='firefox')

    return None
Пример #12
0
    def initializeChart(self):
        self.tooltip = False
        measure = self.view.getAttrByDataModel("measure",
                                               excludeRecord=True)[0]
        msrAttr = self.view.getAttrByChannel(measure.channel)[0]
        xMin = self.view.xMinMax[msrAttr.attribute][0]
        xMax = self.view.xMinMax[msrAttr.attribute][1]

        xRange = abs(
            max(self.view.data[msrAttr.attribute]) -
            min(self.view.data[msrAttr.attribute]))
        plotRange = abs(xMax - xMin)
        markbar = xRange / plotRange * 12

        if (measure.channel == "x"):
            chart = alt.Chart(self.data).mark_bar(size=markbar).encode(
                alt.X(msrAttr.attribute,
                      title=f'{msrAttr.attribute} (binned)',
                      bin=alt.Bin(binned=True),
                      type=msrAttr.dataType,
                      axis=alt.Axis(labelOverlap=True),
                      scale=alt.Scale(domain=(xMin, xMax))),
                alt.Y("Count of Records", type="quantitative"))
        elif (measure.channel == "y"):
            chart = alt.Chart(self.data).mark_bar(size=markbar).encode(
                x=alt.X("Count of Records", type="quantitative"),
                y=alt.Y(msrAttr.attribute,
                        title=f'{msrAttr.attribute} (binned)',
                        bin=alt.Bin(binned=True),
                        axis=alt.Axis(labelOverlap=True),
                        scale=alt.Scale(domain=(xMin, xMax))))
        #####################################
        ## Constructing Altair Code String ##
        #####################################

        self.code += "import altair as alt\n"
        # self.code += f"viewData = pd.DataFrame({str(self.data.to_dict(orient='records'))})\n"
        self.code += f"viewData = pd.DataFrame({str(self.data.to_dict())})\n"
        if (measure.channel == "x"):
            self.code += f'''
		chart = alt.Chart(viewData).mark_bar(size={markbar*3}).encode(
		    alt.X('{msrAttr.attribute}', title='{msrAttr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msrAttr.dataType}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({xMin}, {xMax}))),
		    alt.Y("Count of Records", type="quantitative")
		)
		'''
        elif (measure.channel == "y"):
            self.code += f'''
		chart = alt.Chart(viewData).mark_bar(size={markbar*3}).encode(
		    alt.Y('{msrAttr.attribute}', title='{msrAttr.attribute} (binned)',bin=alt.Bin(binned=True), type='{msrAttr.dataType}', axis=alt.Axis(labelOverlap=True), scale=alt.Scale(domain=({xMin}, {xMax}))),
		    alt.X("Count of Records", type="quantitative")
		)
		'''
        return chart
def main(data_path, file_path):

    data = pd.read_csv(f"{data_path}", index_col=0)

    #First plot
    number_of_properties_by_price = alt.Chart(data).mark_bar(clip = True).encode(
        alt.X('price:Q',
             scale=alt.Scale(domain=(0, 700)),
             bin=alt.Bin(extent=[0, 700], step=25),
             title='Nightly price'),
        alt.Y('count()', title='No. of properties')
    ).properties(width=600, height = 300, title = 'Number of properties by nightly price (between $0 and $700+)')
    
    number_of_properties_by_price.save(os.path.join(file_path, 'number_of_properties_by_price.png'))
    
    
    #Piece of wrangling for next 2 plots. Assigning labels to columns 
    
    price_data_labels = data[['price', 'neighbourhood_cleansed', 'property_type']]
    price_data_labels['label'] = pd.cut(price_data_labels['price'], bins=[0, 100, 300, 500, 13000], 
                               include_lowest=True, labels=['low', 'mid', 'high', 'exceptional'])
  
    
    #Second plot
    Neighborhoods = alt.Chart(price_data_labels).mark_rect().encode(
        alt.X('neighbourhood_cleansed:N', title="Neighborhoods"),
        alt.Y('price:Q', bin=alt.Bin(extent=[0, 700], step=50), title="Nightly price ($ CAD)"),
        alt.Color('count()')
    ).properties(title = "Price of property by neighborhood")
                    
    
    #First and second side bt side                
    concat_1_2 = (number_of_properties_by_price | Neighborhoods).configure_title(fontSize=20
        ).configure_axis(labelFontSize=13,titleFontSize=17
        ).configure_legend(labelFontSize = 13,
                          titleFontSize=15)
    
    concat_1_2.save(os.path.join(file_path, 'neighborhoods.png'))
    
    #Third plot
    
    price_by_property_type = alt.Chart(price_data_labels).mark_rect().encode(
        alt.X('price:Q', bin=alt.Bin(extent=[0, 700], step=50), title="Nightly price ($ CAD)"),  
        alt.Y('property_type:N', title="Property Type"),
        alt.Color('count()')
      ).configure_title(fontSize=20
    ).configure_axis(labelFontSize=13,
                    titleFontSize=17
    ).configure_legend(labelFontSize = 13,
                          titleFontSize=15)
      
    price_by_property_type.save(os.path.join(file_path, 'price_by_property_type.png'))
Пример #14
0
def render(table: alt.Data, *, height: int, width: int, height_minimap: int):
    brush = alt.selection_interval(encodings=['x'])

    base = alt.Chart(table).mark_bar().encode(y='count():Q')
    detail = base.encode(
        alt.X('value:Q',
              bin=alt.Bin(maxbins=30, extent=brush),
              scale=alt.Scale(domain=brush))).properties(width=width,
                                                         height=height)
    minimap = base.encode(alt.X('value:Q', bin=alt.Bin(
        maxbins=30)), ).add_selection(brush).properties(width=width,
                                                        height=height_minimap)

    return detail & minimap
Пример #15
0
def histogram2d(x, y, x_name='x', y_name='y'):
    frame = build_histogram2d_dataframe(x, y, x_name, y_name)

    # plot = altair.Chart(frame).mark_circle().encode(
    #     altair.X(x_name, bin=True),
    #     altair.Y(y_name, bin=True),
    #     size='count()'
    # ).interactive()

    plot = altair.Chart(frame).mark_rect().encode(
        altair.X(x_name, bin=altair.Bin(maxbins=60)),
        altair.Y(y_name, bin=altair.Bin(maxbins=40)),
        altair.Color('count()', scale=altair.Scale(scheme='greenblue')))

    return plot
Пример #16
0
def generate_wage_histogram(data_frame, output_folder, file_name):
    """
    Generates an Altair chart in which 'wage histogram' can be seen.
    Also saves resulting chart as png file in given output folder.

    Parameters:
    -----------
    data_frame : pandas.DataFrame
        input path to be verified
    output_folder : str
        output folder path to save the chart
    file_name : str
        file name for generated chart image
        
    Returns:
    -----------
    str
        saved file path
    """

    chart = alt.Chart(data_frame).mark_bar().encode(
        x=alt.X("Wage", bin=alt.Bin(maxbins=40), title='Wage(in K)'),
        y=alt.Y('count()', title='Number of players')).properties(
            title='Distribution of Wage')

    return save_altair_chart(chart, output_folder, file_name)
Пример #17
0
def make_plot_4(year_range=[2013, 2016], animal="All"):

    # Filtering for intake year via common filter
    if animal == "All":
        df4 = data_new4[((data_new4['intake_year'] >= year_range[0]) &
                         (data_new4['intake_year'] <= year_range[1]))]
        title_string = "Animal"
    # Filtering for intake year via common filter and animal type
    else:
        df4 = data_new4[((data_new4['intake_year'] >= year_range[0]) &
                         (data_new4['intake_year'] <= year_range[1]) &
                         (data_new4['animal_type'] == animal))]
        title_string = animal + "s"

    chart = alt.Chart(df4).mark_bar().encode(
        alt.X("age:Q", bin=alt.Bin(step=1), title="Age (years)"),
        alt.Y('count():Q', stack=None, title="Count"),
        tooltip=['count():Q', 'age:Q']).properties(
            title=title_string + ' Intake Age Distribution',
            width=280,
            height=250).configure_axisX(
                labelFontSize=12, titleFontSize=15,
                labelAngle=0).configure_axisY(
                    labelFontSize=12,
                    titleFontSize=15).configure_title(fontSize=18)
    return chart
def plot_normalization_comparison():
    normalized = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/id_normalized.csv',
        names=['NodeID', 'id'])
    not_normal = pd.read_csv(
        'https://raw.githubusercontent.com/jamescoller/multilayer_design_network_tool/master/results/id_not_normalized.csv',
        names=['NodeID', 'id'])

    # normalization = pd.DataFrame()
    # normalization['NodeID'] = normalized['NodeID']
    # normalization['normal'] = normalized['id']
    # normalization['not_normal'] = not_normal['id']

    # normalization

    normalized['normal'] = 1
    not_normal['normal'] = 0

    norm2 = pd.concat([normalized, not_normal])

    radial_input = alt.binding_radio(options=[1, 0])
    norm_choice = alt.selection_single(fields=['normal'],
                                       bind=radial_input,
                                       name='Normalized?')

    norm_comparison = alt.Chart(norm2).mark_bar().encode(
        x=alt.X('id:Q',
                bin=alt.Bin(maxbins=20),
                title='Interdependency Rating'),
        y=alt.Y('count()', title='Number of Nodes')).add_selection(
            norm_choice).transform_filter(norm_choice).transform_filter(
                alt.datum.NodeID != 64)

    norm_comparison.serve()
Пример #19
0
def plot_altair(hist, dist, dist_name, bin_size):

    brush = alt.selection_interval(encodings=['x'])

    data = pd.DataFrame.from_dict({
        'rf': hist,
        'p': dist
    }, orient='index').transpose().fillna(0).reset_index()

    data['index'] = data['index'] * bin_size

    base = alt.Chart(
        data, title=f'{dist_name} Estimation of EKA Goals').encode(
            alt.X(
                'index:Q',
                title='Goals Scored',
                bin=alt.Bin(step=bin_size),
                axis=alt.Axis(
                    values=[-5, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55
                            ])))

    bar = base.mark_bar(opacity=.7).encode(alt.Y('rf:Q'))

    rule = base.mark_rule(size=2).encode(
        alt.X('index:Q'),
        alt.Y('p:Q', title='Relative Frequency', axis=alt.Axis(tickCount=5)))

    return alt.layer(bar,
                     rule).properties(width=600, height=500).configure_axis(
                         titleFontSize=16).configure_title(fontSize=20)
Пример #20
0
    def hist(self, bins=None, stacked=None, orientation="vertical", **kwargs):
        data = self._preprocess_data(with_index=False)
        if isinstance(bins, int):
            bins = alt.Bin(maxbins=bins)
        elif bins is None:
            bins = True
        if orientation == "vertical":
            Indep, Dep = alt.X, alt.Y
        elif orientation == "horizontal":
            Indep, Dep = alt.Y, alt.X
        else:
            raise ValueError("orientation must be 'horizontal' or 'vertical'.")

        mark = self._get_mark_def({
            "type": "bar",
            "orient": orientation
        }, kwargs)
        chart = (alt.Chart(data, mark=mark).transform_fold(
            list(data.columns), as_=["column", "value"]).encode(
                Indep("value:Q", title=None, bin=bins),
                Dep("count()", title="Frequency", stack=stacked),
                color="column:N",
            ))

        if kwargs.get("subplots"):
            nrows, ncols = _get_layout(data.shape[1],
                                       kwargs.get("layout", (-1, 1)))
            chart = chart.encode(
                facet=alt.Facet("column:N", title=None)).properties(
                    columns=ncols)

        return chart
Пример #21
0
def plot_number_of_eggs_by_age(age, count, xlim=(0, 50)):
    """
    To write
    """
    age_group = map_age_to_age_group(age, encode=False)

    X = np.load('static/data/HFEA_age_and_eggs_collected.npy',
                allow_pickle=True)
    age_mask = X[:, 0] == age_group
    df = pd.DataFrame({'count': X[age_mask, 1]})
    hist_title = f'Historical Oocyte count for persons {age_group} years old.'
    xtitle = 'Number of eggs collected'
    ytitle = 'Number of historical cycles'
    hist = (alt.Chart(df, title=hist_title).mark_bar(opacity=0.75).encode(
        alt.X("count",
              bin=alt.Bin(extent=[0, 50], step=1),
              scale=alt.Scale(domain=xlim),
              title=xtitle), alt.Y('count()', title=ytitle)))

    # add vertical line highlighting user inputted oocyte count
    count_vline = pd.DataFrame({'x': [count]})
    vline = (alt.Chart(count_vline).mark_rule(color='red',
                                              strokeWidth=2,
                                              strokeDash=[3, 2],
                                              opacity=0.5).encode(x='x:Q'))
    st.altair_chart(hist + vline)
Пример #22
0
 def exportHistogram(histogram, path):
     x = range(len(histogram))
     df = pd.DataFrame({'X': x, 'Y': histogram})
     chart = alt.Chart(df).mark_bar().encode(alt.X(
         'X', bin=alt.Bin(maxbins=100)),
                                             y='Y')
     chart.save(path)
Пример #23
0
def save_ensemble_residual_graphs(save_to, models, X, y):
    assert isinstance(save_to, str) == True

    ensemble_residual_df = pd.DataFrame({
        'true_price':
        y,
        'average_ensemble_residual':
        y - average_ensemble_models(models, X)
    })

    residual_chart = alt.Chart(ensemble_residual_df).mark_circle(
        size=30, opacity=0.4).encode(
            x=alt.X('true_price', title='Price'),
            y=alt.Y('average_ensemble_residual',
                    title='Average ensembling residual')).properties(
                        width=850, height=500).properties(
                            title='Average Ensembling Residuals on Test Data')

    residual_dist_chart = alt.Chart(ensemble_residual_df).mark_bar().encode(
        x=alt.X('average_ensemble_residual',
                title='Average ensembling residual',
                bin=alt.Bin(extent=[-1200, 2000], step=5)),
        y='count()').properties(
            width=850,
            height=500).properties(title='Ensembling Residual Distribution')

    with alt.data_transformers.enable('default'):
        residual_chart.save(save_to + '/ensemble_residual_plot.png')
        residual_dist_chart.save(save_to +
                                 '/ensemble_residual_distribution.png')
Пример #24
0
	def initializeChart(self):
		self.tooltip = False
		xAttr = self.dobj.getObjFromChannel("x")[0].columnName
		yAttr = self.dobj.getObjFromChannel("y")[0].columnName
		#measures = list(filter(lambda x: x.dataModel=="measure" if hasattr(x,"dataModel") else False,self.dobj.spec))
		if (yAttr=="count()"):
			chart = alt.Chart(self.dataURL).mark_bar().encode(
				alt.X(xAttr, type="quantitative", bin=alt.Bin(maxbins=50)),
				alt.Y(yAttr)
			)
		else:
			chart = alt.Chart(self.dataURL).mark_bar().encode(
				alt.X(xAttr),
				alt.Y(yAttr, type="quantitative", bin=alt.Bin(maxbins=50))
			)
		return chart 
Пример #25
0
def plot_altair_2(xcol):
    chart = alt.Chart(wine).mark_bar().encode(x=alt.X(xcol,
                                                      type='quantitative',
                                                      bin=alt.Bin(maxbins=30)),
                                              y=alt.Y('count()'),
                                              color='Taste').interactive()
    return chart.to_html()
Пример #26
0
def histogram(values: np.array,
              x_label: str = "Score",
              y_label: str = "Number of Documents",
              x_scale: str = "linear",
              y_scale: str = "linear",
              max_bins: int = 100):
    """Displays a histogram of values.

    This can be really useful for debugging the lengths of documents.

    Args:
        values: A numpy array of quantitative values.
        x_label: A label for the x-axis.
        y_label: A label for the y-axis.
        x_scale: A continuous scale type, defined by `altair <https://altair-viz.github.io/user_guide/generated/core/altair.Scale.html>`_.
        y_scale: A continuous scale type, defined by `altair <https://altair-viz.github.io/user_guide/generated/core/altair.Scale.html>`_.
        max_bins: The maximum number of histogram bins.
    """
    x = alt.X(f"{x_label}:Q",
              bin=alt.Bin(maxbins=max_bins),
              title=x_label,
              scale=alt.Scale(type=x_scale))
    y = alt.Y("count()", title=y_label, scale=alt.Scale(type=y_scale))
    return (alt.Chart(pd.DataFrame({x_label: values})).mark_bar().encode(x=x,
                                                                         y=y))
Пример #27
0
def criar_histograma(coluna, df, sliderH):
    chart = alt.Chart(df).mark_bar().encode(
        x=alt.X(coluna, bin=alt.Bin(base=sliderH, extent=[1, 40000])),
        y='count()',
        tooltip=[coluna, 'count()']).properties(width=600,
                                                height=600).interactive()
    return chart
Пример #28
0
def get_wpm_plot(df):

    df2 = df.copy()
    df2['likert_var'] = np.var(
        df2[['Interest', 'Effective', 'Intelligence', 'Writing', 'Meet']],
        axis=1)
    df2['group'] = 'XLab'
    df2.loc[(df2['Start Date'] < "2021-04-05"), 'group'] = 'Amazon'

    p = alt.Chart(df2).mark_bar(opacity=0.8, stroke=berkeley_palette['black'], strokeWidth=0.5).encode(
        x = alt.X('wpm:Q', bin=alt.Bin(maxbins=100), title="Words per Minute (bin=100)"),
        y = alt.Y('count()', title='Frequency'),
        color=alt.Color('group:N',
            scale=alt.Scale(range = [berkeley_palette['berkeley_blue'], berkeley_palette['california_gold']]),
            legend = alt.Legend(title="Participant Group", padding=10,
                symbolType="square", symbolStrokeWidth=1, orient="right", offset=-170))
        ).properties(height=300,width=650, title={'text':'Distribution of Response Time', 'subtitle':'Evaluated in Words per Minute'})\
                .configure(padding={'top':20, 'left':20, 'right':20,'bottom':20})\
                .configure_facet(spacing=10)\
                .configure_view(stroke=None)\
                .configure_title(anchor='middle')\
                .configure_axis(grid=False)\
                .configure_title(dy=-5)

    return p
def bar_chart(x1, y1, data):
    chart = (alt.Chart(data, width=500, height=300)
          .mark_bar(color='red', size=10, opacity=0.3).encode(
              x=alt.X((str)(x1), bin=alt.Bin(maxbins=100)), 
              y=(str)(y1))
            )
    return chart
Пример #30
0
 def plot_predictions(self, altair_config: Dict[str, Any],
                      **kwargs) -> alt.Chart:
     p_min = kwargs["p_min"]
     p_max = kwargs["p_max"]
     df = pd.DataFrame(data={
         "target": 1,
         "prediction": self.y_pred
     }).assign(focus=lambda df: df["prediction"].between(p_min, p_max))
     color = alt.Color(
         "focus:N",
         legend=None,
         scale=alt.Scale(scheme=altair_config["scheme"]),
     )
     return (alt.Chart(df).mark_bar().encode(
         alt.X(
             "prediction:Q",
             bin=alt.Bin(step=0.01),
             scale=alt.Scale(domain=(0, 1)),
             title="Predicted Probability of class 1",
         ),
         y=alt.Y("count()", title="Number of Predictions"),
         color=color,
         tooltip=["target"],
     ).properties(
         width="container",
         height=300,
         title="Distribution of Model Predictions").configure_title(
             **altair_config["title_config"]))