示例#1
0
def test_complex_df_report():
    """Test our dataframe importing with types of DFs user's upload"""
    tz_df = convert_csv_pd("""
        date,datetime,datetime_tz
        2017-01-10,2017-01-21T23:10:24,2020-03-23T00:00:00.000Z
        2017-01-11,2017-01-23T23:01:24,2020-04-23T00:00:00.000Z
    """)

    raw_data = {
        "first_name": ["Jason", "Molly", "Tina", "Jake", "Amy"],
        "last_name": ["Miller", "Jacobson", "Ali", "Milner", "Cooze"],
        "age": [42, 52, 36, 24, 73],
        "preTestScore": [4, 24, 31, 2, 3],
        "postTestScore": [25, 94, 57, 62, 70],
    }
    index_df = pd.DataFrame(raw_data,
                            columns=[
                                "first_name", "last_name", "age",
                                "preTestScore", "postTestScore"
                            ])
    df_desc = index_df.describe()
    df_desc_2 = df_desc.reset_index()

    tz_t = dp.Table(tz_df)
    index_t = dp.Table(index_df)
    df_desc_t = dp.Table(df_desc)
    df_desc_2_t = dp.Table(df_desc_2)

    with deletable(dp.Report(tz_t, index_t, df_desc_t,
                             df_desc_2_t)) as dp_report:
        dp_report.publish(name=gen_name())
示例#2
0
def gen_report_with_files(datadir: Path,
                          single_file: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    df = gen_df(10000)
    md_block = dp.Markdown(
        text="# Test markdown block <hello/> \n Test **content**")

    list_asset = dp.File(data=lis, name="List Asset", is_json=True)

    img_asset = dp.File(file=datadir / "datapane-logo.png")

    plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x",
                                                                     y="y"),
                         caption="Plot Asset")

    df_asset = dp.Table(df=df, caption="Test Dataframe Table")

    pivot_asset = dp.Table(df=df,
                           caption="Test Dataframe PivotTable",
                           can_pivot=True)

    if single_file:
        return dp.Report(dp.Blocks([md_block, plot_asset]))
    else:
        return dp.Report(list_asset, img_asset, df_asset, md_block, plot_asset,
                         pivot_asset)
示例#3
0
def test_report(tmp_path: Path):
    df = gen_df()
    name = gen_name()
    description = gen_description()
    source_url = "https://github.com/datapane/datapane"
    # create a basic report
    m = dp.Markdown("hello world!!")

    # Asset tests
    lis = [1, 2, 3]
    json_list: str = json.dumps(lis)
    plot = alt.Chart(df).mark_line().encode(x="x", y="y")

    # create the DP
    fn = tmp_path / "json_list.json"
    fn.write_text(data=json_list)
    file_asset = dp.File(file=fn)
    json_asset = dp.File(data=json_list, is_json=True)
    plot_asset = dp.Plot(data=plot)
    list_asset = dp.File(data=lis, is_json=True)
    df_asset = dp.Table(df=df, caption="Our Dataframe")
    dp_report = api.Report(m, file_asset, df_asset, json_asset, plot_asset,
                           list_asset)
    dp_report.publish(name=name,
                      description=description,
                      source_url=source_url)

    with deletable(dp_report):
        # are the fields ok
        check_name(dp_report, name)
        assert dp_report.description == description
        assert dp_report.source_url == source_url
        assert len(dp_report._top_block.blocks[0].blocks) == 6
示例#4
0
def test_report(tmp_path: Path):
    df = gen_df()
    name = gen_name()
    headline = gen_headline()

    # create a basic report
    m = dp.Markdown("hello world!!")

    # Asset tests
    lis = [1, 2, 3]
    json_list: str = json.dumps(lis)
    plot = alt.Chart(df).mark_line().encode(x="x", y="y")

    # create the DP
    fn = tmp_path / "json_list.json"
    fn.write_text(data=json_list)
    file_asset = dp.File(file=fn)
    json_asset = dp.File(data=json_list, is_json=True)
    plot_asset = dp.Plot(data=plot)
    list_asset = dp.File(data=lis, is_json=True)
    df_asset = dp.Table(df=df, caption="Our Dataframe")
    dp_report = api.Report(m, file_asset, df_asset, json_asset, plot_asset, list_asset)
    dp_report.publish(name=name, headline=headline)

    with deletable(dp_report):
        # are the fields ok
        assert dp_report.headline == headline
        assert len(dp_report.top_block.blocks) == 6
def LoadReport(title, X, Y, prediccion, anios):
    """ Función de utilidad para generar el reporte en Datapane. Crea una tabla
    que contiene los valores de las predicciones, así como las métricas con las
    que se evalúan los modelos. También contiene una gráfica de puntos con los 
    valores de la predicción.
    
    Args:
        title (str): titulo del reporte de datapane.
        X (:obj: `numpy.array`): datos reales de prueba.
        Y (:obj: `numpy.array`): datos reales de predicción.
        prediccion (:obj: `numpy.array`): datos predichos.
        anios (int): número de años que se predijeron.
       
    Returns:
        (:obj: `datapane.Report`): reporte de datapane listo para publicar o guardar.
    """
    anios_ = [1998 + i for i in range(len(X))]
    n1 = len(anios_)
    anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)]
    n2 = len(anios_) - n1
    anios_ += [1997 + len(X) + i for i in range(len(Y) + 1)]
    n3 = len(anios_) - n2 - n1

    series = ['Datos de entrenamiento'] * n1
    series += ['Datos de prueba'] * n2
    series += ['Datos predichos'] * n3

    alumnos = list(X)
    alumnos += (list(X) + list(Y))[-(anios + 1):]
    alumnos += (list(X) + list(prediccion))[-(anios + 1):]

    df = pd.DataFrame(data={
        'Serie': series,
        'Año': anios_,
        'Alumnos': alumnos
    })

    columns = ['Año %d' % (i) for i in range(1, anios + 1)]
    columns += ['MAPE', 'MAE', 'RMSE']

    metricas = np.zeros(3)
    # MAPE
    metricas[0] = np.abs((prediccion - Y) / Y).mean()
    # MAE
    metricas[1] = np.abs(prediccion - Y).mean()
    # RMSE
    metricas[2] = np.sqrt(np.square(prediccion - Y).mean())

    metricas = pd.DataFrame(np.array([list(prediccion) + list(metricas)]),
                            columns=columns)

    # Crear grafica
    chart = alt.Chart(df).mark_line().encode(
        x='Año', y='Alumnos', color='Serie').mark_line(
            point=True).interactive().properties(title=title)

    # Crear reporte
    reporte = dp.Report(dp.Table(metricas), dp.Plot(chart))
    return reporte
示例#6
0
def gen_report_complex_with_files(datadir: Path,
                                  single_file: bool = False,
                                  local_report: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    small_df = gen_df()
    big_df = gen_df(10000)

    # text
    # md_block
    html_block = dp.HTML(html="<h1>Hello World</h1>")
    html_block_1 = dp.HTML(html=h2("Hello World"))
    code_block = dp.Code(code="print('hello')", language="python")
    formula_block = dp.Formula(formula=r"\frac{1}{\sqrt{x^2 + 1}}")
    big_number = dp.BigNumber(heading="Tests written", value=1234)
    big_number_1 = dp.BigNumber(heading="Real Tests written :)",
                                value=11,
                                change=2,
                                is_upward_change=True)
    embed_block = dp.Embed(url="https://www.youtube.com/watch?v=JDe14ulcfLA")

    # assets
    plot_asset = dp.Plot(data=gen_plot(), caption="Plot Asset")
    list_asset = dp.File(data=lis, filename="List Asset", is_json=True)
    img_asset = dp.File(file=datadir / "datapane-logo.png")

    # tables
    table_asset = dp.Table(data=small_df, caption="Test Basic Table")
    # local reports don't support DataTable
    dt_asset = table_asset if local_report else dp.DataTable(
        df=big_df, caption="Test DataTable")

    if single_file:
        return dp.Report(dp.Group(blocks=[md_block, dt_asset]))
    else:
        return dp.Report(
            dp.Page(
                dp.Select(md_block,
                          html_block,
                          html_block_1,
                          code_block,
                          formula_block,
                          embed_block,
                          type=dp.SelectType.TABS),
                dp.Group(big_number, big_number_1, columns=2),
            ),
            dp.Page(
                plot_asset,
                list_asset,
                img_asset,
                table_asset,
                dt_asset,
            ),
        )
 def discrim_html_output_datapane(self, ProcDiscrim, fileName):
     """Création d'un reporting en format HTML pour la méthode PROC DISCRIM
     grâce à la librairie datapane.
     
     Paramètres
     ----------
     ProcDiscrim : objet LinearDiscriminantAnalysis
         objet suite à appel de la fonction fit() de la classe 
         LinearDiscriminantAnalysis
     fileName : string
         nom du fichier de sortie (avec ou sans .html)
     """
     if fileName[-5:] != ".html":
         fileName += ".html"
     
     ProcDiscrim._stats_dataset()
     ProcDiscrim._stats_classes()
     ProcDiscrim._stats_pooled_cov_matrix()
     ProcDiscrim._stats_wilks()
     report = dp.Report(
         dp.Text("# Linear Discriminant Analysis"),
         dp.Text("## General information about the data"),
         dp.Table(ProcDiscrim.infoDataset),
         dp.Table(ProcDiscrim.infoClasses),
         dp.Text("## Informations on the covariance matrix"),
         dp.Table(ProcDiscrim.W),
         dp.Table(ProcDiscrim.infoCovMatrix),
         dp.Text("## Function of lda and its' intercept "
                 "and coefficients"),
         dp.Table(ProcDiscrim.infoFuncClassement),
         dp.Text("## Statistics. Wilks' Lambda"),
         dp.Table(ProcDiscrim.infoWilksStats))
     
     report.save(path=fileName)
示例#8
0
文件: test_api.py 项目: cxz/datapane
def gen_report_with_files(datadir: Path,
                          single_file: bool = False) -> dp.Report:
    # Asset tests
    lis = [1, 2, 3]
    small_df = gen_df()
    big_df = gen_df(10000)

    # text
    md_block = dp.Markdown(
        text="# Test markdown block </hello> \n Test **content**")
    html_block = dp.HTML(html="Hello World</hello>")
    big_number = dp.BigNumber(heading="Tests written", value=1234)
    big_number_1 = dp.BigNumber(heading="Real Tests written :)",
                                value=11,
                                change=2,
                                is_upward_change=True)

    # assets
    plot_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x",
                                                                     y="y"),
                         caption="Plot Asset")
    list_asset = dp.File(data=lis, name="List Asset", is_json=True)
    img_asset = dp.File(file=datadir / "datapane-logo.png")

    # tables
    table_asset = dp.Table(data=small_df, caption="Test Basic Table")
    dt_asset = dp.DataTable(df=big_df, caption="Test DataTable")
    dt_pivot_asset = dp.DataTable(df=big_df,
                                  caption="Test DataTable with Pivot",
                                  can_pivot=True)

    if single_file:
        return dp.Report(dp.Blocks(blocks=[md_block, plot_asset]))
    else:
        return dp.Report(
            md_block,
            html_block,
            big_number,
            big_number_1,
            plot_asset,
            list_asset,
            img_asset,
            table_asset,
            dt_asset,
            dt_pivot_asset,
        )
示例#9
0
"""{{ name }} script"""
import pandas as pd
import datapane as dp

# TODO - enter your code here...
df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]})

# Create your datapane report components
report = dp.Report(
    dp.Markdown(f"#### **Sample** Markdown block"), dp.Table(df)
)
report.publish(name="my_report")
示例#10
0
import pandas as pd
import datapane as dp

# basic report creation, with params
df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]})
blocks = [dp.Markdown(f"Dummy Markdown block - {dp.Params['p1']}"), dp.Table(df)]

# test running as main or by datapane runner
if dp.on_datapane:
    print("on datapane")
if __name__ == "__datapane__":  # same as dp.by_datapane
    print("by datapane")
    report = dp.Report(blocks=blocks)
    report.publish(name="dp_report", headline="My Report")
示例#11
0
"""{{ name }} script"""
import pandas as pd
import datapane as dp

# TODO - enter your code here...
df = pd.DataFrame.from_dict({"x": [4, 3, 2, 1], "y": [10.5, 20.5, 30.5, 40.5]})

# Create your datapane report components
report = dp.Report(dp.Markdown(f"#### **Sample** Markdown block"),
                   dp.Table(df))
report.publish(name="my_report")
示例#12
0
)

# In-line JSON
list_asset = dp.File(data=lis, is_json=True)

# Downloadable file
file_asset = dp.File(data=lis)

# In-line image
img_asset = dp.File(file=Path("./datapane-logo.png"))

# Vega
vega_asset = dp.Plot(data=gen_plot())

# Table
df_table_asset = dp.Table(gen_df())
df_datatable_asset = dp.DataTable(gen_df(10000))

# Matplotlib
np.random.seed(19680801)
xdata = np.random.random([2, 10])
xdata1 = xdata[0, :]
xdata2 = xdata[1, :]
xdata1.sort()
xdata2.sort()
ydata1 = xdata1**2
ydata2 = 1 - xdata2**3
mpl_fig = plt.figure(figsize=(15, 15))
ax = mpl_fig.add_subplot(1, 1, 1)
ax.plot(xdata1, ydata1, color='tab:blue')
ax.plot(xdata2, ydata2, color='tab:orange')
示例#13
0
"""{{ name }} report"""
import altair as alt
import pandas as pd
import datapane as dp

# get the data
dataset = pd.read_csv(
    "https://covid.ourworldindata.org/data/owid-covid-data.csv")
df = dataset.groupby(
    ["continent",
     "date"])["new_cases_smoothed_per_million"].mean().reset_index()

# build an altair plot
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
    color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
    tooltip='continent:N').interactive().properties(width='container')

# embed data and plot into a Datapane report and publish
report = dp.Report("## Covid data per continent", dp.Plot(plot), dp.Table(df))
report.publish(
    name="Covid Demo {{ name }}",
    description=
    "Plot of Covid infections per continent, using data from ourworldindata",
    open=True,
)
示例#14
0
import pandas as pd
import altair as alt
import datapane as dp
dataset = pd.read_csv(
    'https://covid.ourworldindata.org/data/owid-covid-data.csv')
df = dataset.groupby(
    ['continent',
     'date'])['new_cases_smoothed_per_million'].mean().reset_index()
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('new_cases_smoothed_per_million:Q', stack=None),
    color=alt.Color('continent:N', scale=alt.Scale(scheme='set1')),
    tooltip='continent:N').interactive().properties(width='container')
dp.Report(dp.Plot(plot), dp.Table(df)).publish(name='covid_report', open=True)
示例#15
0
    + [に、へ、で](#-----)
        * [Common Heuristics](#common-heuristics-4)
        * [Insights](#insights-4)
    + [ね、よ、わ、さ、な、ん](#-----------)
        * [Common Heuristics](#common-heuristics-5)
        * [Insights](#insights-5)


                 """),
    dp.Markdown("""
## Dataset
The corpora used for the current project can be found [here](https://www.kaggle.com/bryanpark/japanese-single-speaker-speech-dataset), [here](https://www.kaggle.com/alvations/tatoeba), and [here](https://www.kaggle.com/nltkdata/knb-corpus). They've been processed via the [Ginza](https://github.com/megagonlabs/ginza) library, which is based on [SudachiPy](https://github.com/WorksApplications/SudachiPy) and [spaCy](https://spacy.io/). These corpora represent a mix of transcribed speech, translated example sentences, and blog articles.

### Full NLP for All Particles
>*Linguistic attributes for all tokens tagged as any sort of particle*
                 """), dp.Table(particle_df),
    dp.Markdown("""
### Relative Frequency for All Particles
>*Frequency table for all particles, including counts, percentages, and cumulative statistics*
                 """), dp.Table(particle_stb),
    dp.Markdown("""
---

## Particle Comparisons
>*Here, we can look at the contexts in which different particles appear most frequently, and see how these contexts compare to conventional wisdom/rules about how the particles are used*

### は & が
#### Common Heuristics
+ One common way to differentiate は and が is that は marks the *topic* of a sentence, where が marks the grammatical *subject* in a sentence. 
+ In this sense, は can lend more emphasis to the subject it marks as compared to が
+ が tends to be used more in noun and subordinate clauses, and if は is used in these contexts, it tends to be for emphasis
示例#16
0
    rows += [{
        'ticker': ETHC_ticker,
        'currency': currency.symbol,
        'nav': nav,
        'nav_per_share': nav_per_share,
        'share_price': price,
        'shares_outstanding': shares_outstanding,
        'premium': (price / nav_per_share - 1) * 100
    }]

df = pd.DataFrame(rows)
r = dp.Report(
    f'# Ether Capital Corp. NAV',
    dp.Text(f'Updated {now}'),
    f'### Holdings',
    dp.Table(pd.DataFrame(current_holdings).iloc[1:]),
    f'### Share Price Premium',
    dp.Table(df),
    f'The maximum discount is {-df["premium"].min():.1f} %' \
            if df["premium"].mean() < 0 else
            f'The maximum premium is {df["premium"].max():+.1f} %'
)

r.save(
    path='report.html',
    name=f'Ether Capital Corp. NAV',
    open=False
)

示例#17
0
    'https://covid.ourworldindata.org/data/vaccinations/vaccinations-by-manufacturer.csv',
    parse_dates=['date'])
df = df.groupby(['vaccine',
                 'date'])['total_vaccinations'].sum().tail(1000).reset_index()

# plot vaccinations over time using Altair
plot = alt.Chart(df).mark_area(opacity=0.4, stroke='black').encode(
    x='date:T',
    y=alt.Y('total_vaccinations:Q'),
    color=alt.Color('vaccine:N', scale=alt.Scale(scheme='set1')),
    tooltip='vaccine:N').interactive().properties(width='container')

# tablulate total vaccinations by manufacturer
total_df = df[df["date"] == df["date"].max()].sort_values(
    "total_vaccinations", ascending=False).reset_index(drop=True)
total_styled = total_df.style.bar(subset=["total_vaccinations"],
                                  color='#5fba7d',
                                  vmax=total_df["total_vaccinations"].sum())

# embed into a Datapane Report
report = dp.Report(
    "## Vaccination Report",
    dp.Plot(plot, caption="Vaccinations by manufacturer over time"),
    dp.Table(total_styled,
             caption="Current vaccination totals by manufacturer"),
    dp.Table(df, caption="Initial Dataset"))
report.upload(
    name='Covid Vaccinations Demo {{ name }}',
    description="Covid Vaccinations report, using data from ourworldindata",
    open=True)
示例#18
0
    "NTAR.CN", "IDK.CN", "ART.V", "PKK.CN", "APHA.TO", "CMC.CN", "AMPD.CN",
    "MTRX.V"
]

curr_date = datetime.today().strftime('%Y-%m-%d')
start_date = (datetime.today() - timedelta(days=59)).strftime('%Y-%m-%d')

df_assets = generate_up(stock_list, start_date, curr_date)

figure_list = [
    dp.Plot(intraday_plot(stock, start_date, curr_date))
    for stock in stock_list
]

publish_report = False
dp_token = os.getenv('DP_TOKEN')
if dp_token:
    # login
    try:
        publish_report = True
    except Exception as e:
        print(e)

# login
r = dp.Report(f'### Intraday Report for {curr_date}', dp.Table(df_assets),
              dp.Blocks(*figure_list, columns=2))
r.save(path='index.html', open=True)

if publish_report == True:
    r.publish(name='Daily Report', open=False, tweet=False)
md_block = dp.Markdown(text="# Test markdown block \n Test **content**")

# In-line JSON
list_asset = dp.File(data=lis, is_json=True)

# Downloadable file
file_asset = dp.File(data=lis)

# In-line image
img_asset = dp.File(file=Path("./datapane-logo.png"))

# Vega
vega_asset = dp.Plot(data=alt.Chart(gen_df()).mark_line().encode(x="x", y="y"))

# Table
df_asset = dp.Table(df, can_pivot=False)

# Pivot table
pv_asset = dp.Table(gen_df(10), can_pivot=True)

# Matplotlib
np.random.seed(19680801)
xdata = np.random.random([2, 10])
xdata1 = xdata[0, :]
xdata2 = xdata[1, :]
xdata1.sort()
xdata2.sort()
ydata1 = xdata1**2
ydata2 = 1 - xdata2**3
mpl_fig = plt.figure(figsize=(15, 15))
ax = mpl_fig.add_subplot(1, 1, 1)
示例#20
0
'''
DATAPANE INTERACTIVE REPORT

pip install datapane

# https://datapane.com/reports/?name=&owned_by_me=on&order=
# Remember to login
# https://towardsdatascience.com/introduction-to-datapane-a-python-library-to-build-interactive-reports-4593fd3cb9c8
'''

import pandas as pd
import altair as alt
import datapane as dp

df = pd.read_csv(
    'https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history'
)

chart = alt.Chart(df).encode(x='Date:T', y='Open').mark_line().interactive()

# Once you have the df and the chart, simply use
r = dp.Report(
    dp.Markdown('My simple report'),  #add description to the report
    dp.Table(df),  #create a table
    dp.Plot(chart)  #create a chart
)

# Publish your report. Make sure to have visibility='PUBLIC' if you want to share your report
r.publish(name='stock_report', visibility='PUBLIC')
pip install datapane


# In[2]:


import pandas as pd
import altair as alt
import datapane as dp


# In[3]:


df = pd.read_csv('https://query1.finance.yahoo.com/v7/finance/download/GOOG?period2=1585222905&interval=1mo&events=history')

chart = alt.Chart(df).encode(
    x='Date:T',
    y='Open'
).mark_line().interactive()

r = dp.Report(dp.Table(df), dp.Plot(chart))
r.save(path='report.html', open=True)


# In[ ]:




示例#22
0
    "Media - Diversified",
    "Industrial Distribution",
    "Agriculture",
    "Beverages - Non-Alcoholic",
    "Medical Devices & Instruments",
    "Diversified Financial Services",
    "Furnishings, Fixtures & Appliances",
    "Steel",
    "Packaging & Containers",
    "Semiconductors",
    "Waste Management",
    "Healthcare Plans",
]


def industry_to_md(industry_list):
    return '\n'.join([f'* {i}' for i in industry_list])


pattern = '(?i)Waste Management|(?i)Interactive Media|(?i)Telecommunication Services|(?i)Software|(?i)Hardware'

desired_df = stock_df[stock_df['industry'].str.contains(pattern, na=False)]

industry_list = industry_to_md(industries)
curr_date = datetime.today().strftime('%Y-%m-%d')
r = dp.Report(f'### Desired Stocks for {curr_date}', dp.Table(desired_df),
              f'### Industry List', f'{industry_list}')
r.save(path='industry_index.html')

r.publish(name='Desired Tickers', open=False, tweet=False)