Пример #1
0
 def showStatistics(self, aggregated=True, saveHTML=False):
     df = self.df.copy()
     if aggregated:
         if self.agg_df.empty:
             self.aggregateTrials()
         df = self.agg_df.copy()
         
     name = "aggregated statistics" if aggregated else "raw statistics"
     name_html = name+'.html'
     display(HTML(df.to_html()))
     if saveHTML:
         pivot_ui(df,outfile_path=os.path.join(self.experiment_name, name_html))
Пример #2
0
    def showStatistics(self, aggregated=True, saveHTML=False):
        df = self.df.copy()
        if aggregated:
            if self.agg_df.empty:
                self.aggregateTrials()
            df = self.agg_df.copy()
            
        name = "aggregated statistics" if aggregated else "raw statistics"
        name_html = name+'.html'
#         df.columns = [' '.join(col).strip() for col in df.columns.values] # work around:https://github.com/quantopian/qgrid/issues/18#issuecomment-149321165
#         return qgrid.show_grid(df, show_toolbar=True)
        display(HTML(df.to_html()))
        if saveHTML:
            pivot_ui(df,outfile_path=os.path.join(self.experiment_name, name_html))
Пример #3
0
    def showDF(self, df, how='print'):
        if how == 'print':
            #pretty print string
            print(df.to_string())
        elif how == 'pivot':
            #render pivot table in chrome
            pivot_ui(df, outfile_path='x.html')
            webbrowser.open(SPATH + 'x.html', new=2)
        elif how == 'tabulate':
            #print string with borders
            print(tabulate(df, headers='keys', tablefmt='psql'))
        else:
            print('invalid "how" in showDF()')

        print(df.shape)
Пример #4
0
def _pivot_ui(df, totals=True, row_totals=True, **options):
    """ Interactive pivot table for data analysis.
    # Example options:
    rows=['x', 'y'),
    cols=['z, 'v'),
    vals=['percentile/99th',],
    aggregatorName='First',
    rendererName='Heatmap'
    """
    try:
        from pivottablejs import pivot_ui
    except ImportError:
        log.warning("Error: cannot import pivottablejs, Pivottable will not be generated'!")
        return
    iframe = pivot_ui(df, **options)
    if not totals:
        with open(iframe.src) as f:
            replaced_html = f.read().replace(
                '</style>', '.pvtTotal, .pvtTotalLabel, .pvtGrandTotal {display: none}</style>')
        with open(iframe.src, "w") as f:
            f.write(replaced_html)
    if not row_totals:
        with open(iframe.src) as f:
            replaced_html = f.read().replace(
                '</style>', '.rowTotal, .pvtRowTotalLabel, .pvtGrandTotal {display: none}</style>')
        with open(iframe.src, "w") as f:
            f.write(replaced_html)
    return iframe
Пример #5
0
def pivot_ui(df, **kwargs):
    import pivottablejs

    class _DataFrame(pd.DataFrame):
        def to_csv(self, **kwargs):
            return super().to_csv(**kwargs).replace("\r\n", "\n")

    return pivottablejs.pivot_ui(_DataFrame(df), **kwargs)
Пример #6
0
 def pivot_ui(self):
     """ Interactive pivot table for data analysis. """
     try:
         from pivottablejs import pivot_ui
     except ImportError:
         print(
             "Error: cannot import pivottablejs, please install 'pip install pivottablejs'!"
         )
         return
     return pivot_ui(self.df)
Пример #7
0
def custom_filter_plots(input_csv_file, output_directory, custom_pivot_table):
    """

    Here, user can edit/customize functions in order to analyze results.
    If interactive pivot table is true, an interactive (drag) panel containing
    the pivot table will appear inside a browser (Integrated with Jupyter).


    Inputs:
           Parse file containing all results from simulation campaign.
    Outputs:
           Custom filtered/sorted data files
           Customized plots (custom or using interactive pivot tables)


    """

    new_folder(output_directory)
    df = pd.read_csv(input_csv_file)

    if custom_pivot_table:
        pj.pivot_ui(df)
        cmd = 'firefox pivottablejs.html'
        os.system(cmd)
    else:
        # Rename columns for axis plotting # TO DO
        df = df.rename(columns={evaluate: evaluate})

        # cambio filas
        #df = df.rename(columns={'accidentDuration': 'Beacon_interval(s)', 'beaconInterval': 'Accident_duration'})

        # Examples of custom analyze functions
        #node_speed(df, output_directory)
        #packet_losses_new(df, output_directory)
        rx_time_distribution(df, output_directory)
        distance(df, output_directory)
        #node_counter(df, output_directory)
        # Print outputs
        print('\nFiles generated: ')
        [
            print(' {}) {}'.format(i, file))
            for i, file in enumerate(os.listdir(output_directory))
        ]
Пример #8
0
def display_dataframe_with_pivotablejs(obj: Any):
    """
    Function to convert a variable to a pivotable js
        :param obj: table object to display,
    """
    tf = tempfile.NamedTemporaryFile(prefix="sho_",
                                     suffix=".html",
                                     delete=False)
    file_path = tf.name
    cols = list(obj.columns.values)
    print(f"File Name : {tf.name}")
    pivot_ui(obj, outfile_path=file_path, rows=cols)
    try:
        browser = webbrowser.get()
        browser.open('file://' + os.path.realpath(file_path))
    except Exception as e:
        logger.info(f"Couldn't find chrome !")
        webbrowser.open('file://' + os.path.realpath(file_path))
    time.sleep(5)
    logger.info(f"File Name : {tf.name}")
Пример #9
0
def df_meta_ui(df):
    box = ipyw.HBox()
    out_left = ipyw.Output()
    out_right = ipyw.Output(layout=ipyw.Layout(width="100%"))
    desc_df = df.describe()
    box.children = [out_left, out_right]
    with out_left:
        df.info()
        display(desc_df)
        display(df.head())
    with out_right:
        display(pivot_ui(df))
    return box
Пример #10
0
def _pivot_ui(df, totals=True, **options):
    """ Interactive pivot table for data analysis. """
    try:
        from pivottablejs import pivot_ui
    except ImportError:
        print("Error: cannot import pivottablejs, please install 'pip install pivottablejs'!")
        return
    iframe = pivot_ui(df, **options)
    if not totals:
        with open(iframe.src) as f:
            replacedHtml = f.read().replace(
                '</style>',
                '.pvtTotal, .pvtTotalLabel, .pvtGrandTotal {display: none}</style>'
            )
        with open(iframe.src, "w") as f:
            f.write(replacedHtml)
    return iframe
Пример #11
0
# in-Notebook GUI for pivoting the data (good for exploring categorical data)
from pivottablejs import pivot_ui

pivot_ui(data)
Пример #12
0
列数行数大きすぎると重くて使えないが。。。(300列あるとなんもでなくなる)

Usage:
    $ conda activate tfgpu
    $ python csv2pivot_html.py -o ./ -i ./train.csv
"""
import os
import pathlib
import argparse
import pandas as pd
from pivottablejs import pivot_ui
from IPython.display import HTML

if __name__ == "__main__":
    ap = argparse.ArgumentParser()
    ap.add_argument(
        "-o", "--output_dir", type=str, default="./", help="output dir path.",
    )
    ap.add_argument(
        "-i", "--input_csv", type=str, help="input csv path.",
    )
    args = vars(ap.parse_args())

    os.makedirs(args["output_dir"], exist_ok=True)
    df = pd.read_csv(args["input_csv"])
    outfile_path = f'{args["output_dir"]}/{pathlib.Path(args["input_csv"]).stem}.html'

    # pivot html出力
    pivot_ui(df, outfile_path=outfile_path)
    HTML(outfile_path)
Пример #13
0
    ts[variable] = ts[variable].notnull() * 1
    map = folium.Map(location=[48, -102], zoom_start=2)
    map.choropleth(geo_data=geo,
                   data=ts,
                   columns=['country', variable],
                   key_on='feature.properties.name',
                   reset=True,
                   fill_color='GnBu',
                   fill_opacity=1,
                   line_opacity=0.2,
                   legend_name=legend_name if legend_name else variable)
    return map


plot_null_map(data, '2013-2017', 'number_undernourished',
              'Number undernourished is missing')

#Over time
fig, ax = plt.subplots(figsize=(16, 16))
sns.heatmap(data.groupby(['time_period',
                          'variable']).value.count().unstack().T,
            ax=ax)
plt.xticks(rotation=45)
plt.xlabel('Time period')
plt.ylabel('Variable')
plt.title('Number of countries with data reported for each variable over time')
plt.show()

pivottablejs.pivot_ui(time_slice(data, '2013-2017'), )
pandas_profiling.ProfileReport(time_slice(data, '2013-2017'))
Пример #14
0

# In[5]:

df = pd.read_csv("../data/mps.csv")


# In[6]:

df.head()


# In[7]:

from pivottablejs import pivot_ui
pivot_ui(df)
# Province, Party, Average, Age, Heatmap


# # Keyboard shortcuts

# In[8]:

# in select mode, shift j/k (to select multiple cells at once)
# split cell with ctrl shift -


# In[9]:

first = 1
Пример #15
0
        financials[j]['Current Ratio'] = Ratios[j + p]['currentRatio']
        financials[j]['Cash Conversion Cycle'] = Ratios[
            j + p]['cashConversionCycle']

        # Price Ratios
        financials[j]['Mkt Cap'] = key_Metrics[j + p]['marketCap'] / millions
        financials[j]['PE'] = Ratios[j + p]['priceEarningsRatio']
        financials[j]['PS'] = Ratios[j + p]['priceToSalesRatio']
        financials[j]['PB'] = Ratios[j + p]['priceToBookRatio']
        financials[j]['Price To FCF'] = Ratios[j +
                                               p]['priceToFreeCashFlowsRatio']
        financials[j]['PEG'] = Ratios[j + p]['priceEarningsToGrowthRatio']
        financials[j]['Revenue per Share'] = key_Metrics[j +
                                                         p]['revenuePerShare']
        financials[j]['EPS'] = IS[j + p]['eps']

    # Transform the dictionary into a Pandas
    fundamentals_single = pd.DataFrame.from_dict(financials, orient='index')

    # Add a new column that indicates the stock
    stock_identity = [company[i]] * len(fundamentals_single.index)
    fundamentals_single.insert(0, "Stock", stock_identity, True)

    # Concatenate the 2 dataframes together
    fundamentals_total = pd.concat([fundamentals_total, fundamentals_single])

# Export to Excel or directly to pivot table
pivot_ui(fundamentals_total)
with pd.ExcelWriter('fundamentals.xlsx', mode='a') as writer:
    fundamentals_total.to_excel(writer, sheet_name='consolidated_quarter')
Пример #16
0
c=3

# %%
b=5

# %%
e=8

# %%
df

# %%
from pivottablejs import pivot_ui



# %%

pivot_ui(df,outfile_path=’pivottablejs.html’)
HTML(‘pivottablejs.html’)

# %%
import qgrid

# %%
qgrid.show_grid(df.)

# %%

# %%
def view_pivotTable(plot_data: plot_data):
    data = globals()[plot_data]
    display(pivot_ui(data))
Пример #18
0
    for i in json_device_list['devices']:
        device_dict['sysName'] = i['sysName']
        device_dict['ip'] = i['ip']
        device_dict['sysDescr'] = i['sysDescr']
        device_dict['os'] = i['os']
        device_dict['version'] = i['version']
        device_dict['location'] = i['location']
        device_dict['hardware'] = i['hardware']
        device_dict['hostname'] = i['hostname']
        device_dict['features'] = i['features']
        device_dict['serial'] = i['serial']
        device_dict['nms'] = "https://" + NMS_DEVICE_URL + str(i['device_id'])
        device_dict['rancid'] = "http://" + RANCID + i['hostname']
        devices_list.append(dict(device_dict))

with open(CSV_FILE, 'w') as csvfile:
    writer = csv.DictWriter(csvfile,delimiter=',',fieldnames=devices_list[0].keys())
    writer.writeheader()
    writer.writerows(devices_list)

df = pandas.read_csv(CSV_FILE)
pivot_ui(df,outfile_path=HTML_FILE,rows=['sysName','hostname','ip','hardware','os','version','features','serial','location','nms','rancid'])
with open(HTML_FILE) as myfile:
        txt = myfile.read()
            soup = bs4.BeautifulSoup(txt, features="html.parser")

            soup.head.append("Last Edit: " + DATE.strftime("%Y-%m-%d %H:%M"))
            with open(HTML_FILE, "w") as outfile:
                    outfile.write(str(soup))
Пример #19
0
def olap_configuration():

    global liste
    global i
    global label
    global text
    #récupérer les dimensions choisies
    my_dimension_temps = choose_dimension_temps.get()
    my_dimension_node = choose_dimension_node.get()
    my_dimension_spatiale = choose_dimension_spatiale.get()
    # Récupérer la liste des mesures dans une chaine de caractères sous la forme "mesure1,mesure2..."
    my_measure = str(measures_listbox.get(0))
    for item in range(1, len(measures_listbox.curselection())):
        my_measure = str(measures_listbox.get(item)) + "," + my_measure

    #Créer le dataframe utilisé dans la table pivot
    df = joined_data(my_dimension_temps, my_measure)

    booleen = True
    # choisir les champs à garder du dataframe df en distinguant les cas selon les dimensions choisies par l'utilisateur (il se peut
    # que l'utilisateur ne choisisse pas des dimensions)
    if (my_dimension_spatiale):
        if (my_dimension_node):
            if (my_dimension_temps):
                final_data = df[flat(my_measure.split(','), [
                    my_dimension_temps, my_dimension_node,
                    my_dimension_spatiale
                ])]

            else:
                final_data = df[[
                    my_dimension_node, my_dimension_spatiale,
                    my_measure.split(',')
                ]]
        elif (my_dimension_temps):
            final_data = df[[
                my_dimension_temps, my_dimension_spatiale,
                my_measure.split(',')
            ]]
        else:
            final_data = df[[my_dimension_spatiale, my_measure.split(',')]]
    else:
        if (my_dimension_node):
            if (my_dimension_temps):
                final_data = df[[
                    my_dimension_temps, my_dimension_node,
                    my_measure.split(',')
                ]]
            else:
                final_data = df[[my_dimension_node, my_measure.split(',')]]
        elif (my_dimension_temps):
            final_data = df[[my_dimension_temps, my_measure.split(',')]]

        #si l'utilisateur n'a choisi aucune dimension
        else:

            booleen = False

    # générer l'interface de la table pivot si l'utilisateur a choisi au moins une dimension
    if (booleen):
        pivot_ui(final_data,
                 rows=[
                     my_dimension_temps, my_dimension_node,
                     my_dimension_spatiale
                 ],
                 exclusions={final_data.columns[0]: ["null"]},
                 outfile_path='cube.html')
        HTML('cube.html')
        # Ouvrir l'interface dans le navigateur
        webbrowser.open("cube.html")
    # si l'utilisateur n'a choisi aucune dimension, un message d'erreur s'affiche
    else:
        error = Label(button_frame,
                      text="Il faut choisir des dimensions",
                      font=("Courrier", 14),
                      bg='#4065A4',
                      fg='red')
        error.pack()
    # réinitialiser la liste des saisies dans le filtre géospatial (liste des points + affichage des points saisis dans la variable text)
    liste = []
    i = 0
    text = ""
    label.config(text=text)
Пример #20
0
import pandas as pd

df = pd.read_csv("./tmp/tmp_all_data.csv").astype({"connection_hrs": float})
df.head()
print(df)

from pivottablejs import pivot_ui

pivot_ui(df, outfile_path='./output/reports/!ipivottablejs.html')
bb = signature.get_bbox_patch()
bb.set_boxstyle("ext", pad=0.6, width=ax.get_xlim()[1])

middle = (ax.get_xlim()[0] + ax.get_xlim()[1]) / 2
ax.text(x = middle, y = ax.get_ylim()[0]-offset,
                    s = "DEATHS PER DAY", fontsize = 14, color = "#f0f0f0",
                    ha="center", backgroundcolor = "steelblue")
ax.text(x = ax.get_xlim()[1], y = ax.get_ylim()[0]-offset,
                    s = str2, fontsize = 14, color = "#f0f0f0",
                    ha="right", backgroundcolor = "steelblue")

# Save as jpg and show plot
fname = str(datetime.date.today()) + "_sweden_death_compare" + ".png"
plt.savefig(fname, format="png", dpi="figure", bbox_inches="tight", facecolor=fig.get_facecolor())
sns.despine()
plt.show()


import pandas_profiling
from pivottablejs import pivot_ui
from pydqc import distribution_compare_pretty


pandas_profiling.ProfileReport(scb_deaths).to_file("report.html")


pivot_ui(scb_deaths)




# In[12]:


X=list(dict.fromkeys(TOU['DAY']))
Y=list(dict.fromkeys(TOU['MONTH']))


# In[13]:


f = open("TOU_Analysis_Output.TXT","w+")
f.write('TOU Analysis Results: ({}/{} - {}/{})'.format(X[0],Y[0],X[-1],Y[-1]))
f.write('\n'*2)
for i in X:
    f.write('Day '+str(i)+'\n')
    f.write('1)TOU consumption' + ' = ' + str(round(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY']),2))+' kWh' + '\n')
    f.write('2)Total consumption' + ' = '+ str(round(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']),2))+ ' kWh'+'\n')
    f.write('3)Difference = ' + str(round((sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']))-(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY'])),2))+' kWh' + '\n')
    f.write('\n')
    #print(str(i)+' Difference = ' + str(round((sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME =='Active power (P) Import Total (kW)-kWh') ]['QTY']))-(sum(TOU[(TOU.DAY ==i) & (TOU.QTY >0) & (TOU.NAME !='Active power (P) Import Total (kW)-kWh') ]['QTY'])),2))+' kWh' + '\n')
f.close()


# In[14]:


pivot_ui(TOU,outfile_path='TOU_Analysis.html',width=0,height=0)

Пример #23
0
def load_data(file):
	df = joblib.load(file)
	return pivot_ui(df)
Пример #24
0
import pandas as pd
from pivottablejs import pivot_ui

df = pd.read_csv("data/placholder.csv")
pivot_ui(df)
Пример #25
0
    selectGraph = data.query('(YEAR == @selectYr) & (MONTH == @selectMth)')
    selectGraph = selectGraph.drop([
        'Unnamed: 0', 'WEEKNUMBER', 'CREATED_AT', 'LEVEL', 'AVER HOURLY SALARY'
    ],
                                   axis=1)
    selectGraph

# Appendices: pivot table and forecasting data
st.subheader('Appendices')

# Hide/Show pivot table for raw data
st.sidebar.subheader("Appendices")
if st.sidebar.checkbox("Show pivot table for the raw data", True):
    st.markdown("#### Pivot table for the raw data")
    dataPv = data.drop(['AVER HOURLY SALARY'], axis=1)
    pvTable = pivot_ui(dataPv)
    with open(pvTable.src) as t:
        components.html(t.read(), height=400, scrolling=True)

# Below is the machine learning time series forecasting for time enteries
dataPrd = data
dataPrd = pd.pivot_table(dataPrd,
                         index=['CREATED_AT'],
                         values='HOUR',
                         aggfunc='sum',
                         margins=True)
dataPrd = dataPrd[:-1]
dataPrd = pd.DataFrame(dataPrd.to_records())

dates = list(dataPrd['CREATED_AT'])
dates = list(pd.to_datetime(dates))
    'emp_length',
    'fico_range_low(log)',
    'term',
    'int_rate',
    'funded_amnt(log)',
    'grade',
    'sub_grade',
    'annual_inc(log)']

# categorical variables
cate_list = ['addr_state',
             'purpose',
             'home_ownership']

# make pivot table
pivot_ui(df[['loan_status'] + [x if '(log)' not in x else x[:-5] for x in num_list + cate_list]],
         outfile_path="pivot_table/2017Q2LendingClub.html")

# make hist
plt.figure()
df[num_list + ['loan_status']
   ].hist(bins=50, figsize=(15, 15), edgecolor='white')
plt.savefig('figures/dist.png')
plt.gcf().clear()

for var_name in ['funded_amnt', 'annual_inc']:
    get_map(var_name, log=1, method='mean')

for var_name in ['loan_status', 'grade', 'emp_length']:
    get_map(var_name, log=0, method='mean')

# handling imbalanced data
Пример #27
0
                                                   regex=True)
DimDf['Bundle_IND'] = DimDf['PFAMName'].str.contains('Bndl',
                                                     flags=re.IGNORECASE,
                                                     regex=True)

# In[ ]:

DimDf.head(20)

# In[ ]:

print(DimDf.dtypes)

# In[ ]:

pivot_ui(DimDf)

# In[ ]:

get_ipython().run_line_magic('matplotlib', 'inline')
msno.matrix(Df)

# In[288]:

sns.pairplot(Df)

# In[ ]:

sns.pairplot(Df, hue="Day")

# In[ ]:
metrics = {}

for i in range(0, len(pricing)):

    for j in range(0, len(stock_list)):
        if stock_list[j] == pricing[i]['symbol']:
            metrics[stock_list[j]] = {}
            metrics[stock_list[j]]["Stock"] = pricing[i]["symbol"]
            metrics[stock_list[j]]["Price"] = pricing[i]["price"]
            metrics[stock_list[j]]["50-day Moving Average"] = pricing[i][
                "priceAvg50"]
            metrics[stock_list[j]]["200-day Moving Average"] = pricing[i][
                "priceAvg200"]
            metrics[stock_list[j]]["PE Ratio"] = pricing[i]["pe"]

            if pricing[i]["price"] is None:
                break
            else:
                metrics[stock_list[j]]["Delta from 50-Avg"] = pricing[i]["price"] - \
                    pricing[i]["priceAvg50"]
                metrics[stock_list[j]]["Delta from 200-Avg"] = pricing[i]["price"] - \
                    pricing[i]["priceAvg200"]

# Transform the dictionary into a Pandas
metrics_df = pd.DataFrame.from_dict(metrics, orient='index')

# Export to Excel or directly to pivot table
pivot_ui(metrics_df)
with pd.ExcelWriter('stock select.xlsx', mode='a') as writer:
    metrics_df.to_excel(writer, sheet_name='trial')
# Show information when hovering the mouse over datapoints
this_plot.add_tools(tools.HoverTool(tooltips=[("Day",
                                               "@day")]))  # @ chooses feature

# Hide all circles of a given category when clicked in legend
this_plot.legend.click_policy = "hide"

output_notebook()
show(this_plot)

# %% [markdown] {"slideshow": {"slide_type": "slide"}}
# # Pivot table plots

# %%
from pivottablejs import pivot_ui
pivot_ui(tips)

# %% [markdown] {"slideshow": {"slide_type": "slide"}}
# # Dash/Plotly
# [Dash/Plotly](https://plot.ly/products/dash/) is another package for producing really nice and interactive graphs, but it requires signing up for an account to initialize it. After initialization you can use it online by default (which means all of your graphs get saved to the cloud for everyone to see forever) or you can use it offline (as demoed below). Examples taken or modified from [here](https://plot.ly/python/ipython-notebook-tutorial/).
#
# I'm not familiar with the new Dash API that's been recently introduced, nor have I really explored using Plotly. I've been able to get everything that I need done in Matplotlib/Seaborn, so understand that the code snippets below may no longer work with recent versions of the Plotly package (which seems like a different thing to Dash).

# %% [markdown] {"slideshow": {"slide_type": "skip"}}
# ## Troubleshooting setup
# When I first tried using plotly I sometimes got `IOPub data rate exceeded` errors. Here's how you fix that:
#
# - run `jupyter notebook --generate-config` to generate a clean configuration file with all parameters commented out
# - modify `c.NotebookApp.iopub_data_rate_limit` and `c.NotebookApp.iopub_msg_rate_limit` to be some absurdly large numbers

# %% [markdown] {"slideshow": {"slide_type": "subslide"}}
Пример #30
0
#numbers4.rolling('8h', min_periods=1).sum()

grouped = numbers4.groupby(['WeekNo', 'TestTestedBy']).agg({
    'Total_Hrs': 'sum',
    'CEP_Hrs': 'sum',
    'UTM_Hrs': 'sum',
    'AdjustmentTime': 'sum',
    'hrs_lead': 'mean'
}).round().reset_index()
#grouped = numbers4.groupby(['WeekNo','TestTestedBy'])['Total_Hrs','CEP_Hrs','UTM_Hrs','AdjustmentTime'].sum().reset_index()

# In[ ]:

from pivottablejs import pivot_ui
pivot_ui(numbers4)

# # The results for number of hours completed

# In[3]:

grouped = grouped.sort_values(['WeekNo', 'Total_Hrs'],
                              ascending=[False, False]).reset_index()
#grouped = grouped.sort_values(['WeekNo','Total_Hrs'], ascending=[False,False]).reset_index()
grouped.index = grouped.index + 1
grouped.drop(['index'], axis=1, inplace=True)
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 0])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'barneje'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'loncmm1'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'lonutm1'])
grouped = grouped.drop(grouped.index[grouped.TestTestedBy == 'lonutm2'])
Пример #31
0
    def validate(ss, config):
        """Apply validation process using config input file."""
        source_read_df = (ss.read.format("csv").option("header", "true").load(
            config.source_df))
        comparable_dfs_list = [
            (t, ss.read.format("csv").option("header", "true").load(t))
            for t in config.comparable_dfs_list
        ]

        validator = DataframeValidator(
            spark=ss,
            source_df=source_read_df,
            id_col_name=config.id_col_name,
            correctness_rules_dict=config.correctness_rules_dict,
            parent_children_validation_pairs=config.
            parent_children_validation_pairs,
            completeness_rules_dic=config.completeness_rules_dic,
            comparable_dfs_list=comparable_dfs_list,
            unique_column_group_values_per_table=config.
            unique_column_group_values_per_table,
        )

        processed_df = validator.process()
        completeness_df = processed_df.limit(1).select(
            Constants.OVER_ALL_COUNT_COL,
            Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL,
            Constants.DATE_TIME_REPORT_COL,
        )

        correctness_df = processed_df.drop(
            Constants.OVER_ALL_COUNT_COL,
            Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL,
        )
        comparison_df = validator.compare()

        correctness_df.coalesce(1).write.mode("append").json(
            config.output_correctness_table)
        completeness_df.coalesce(1).write.mode("append").json(
            config.output_completeness_table)
        comparison_df.coalesce(1).write.mode("append").json(
            config.output_comparison_table)

        pd_correctness_df = ss.read.json(
            config.output_correctness_table).toPandas()
        pd_completeness_df = ss.read.json(
            config.output_completeness_table).toPandas()
        comparison_df = ss.read.json(config.output_comparison_table).toPandas()

        pivot_ui(
            pd_correctness_df,
            outfile_path="{}.html".format(config.output_correctness_table),
            menuLimit=5000,
            overwrite=True,
            rows=[config.id_col_name] + list(
                filter(
                    lambda x: Constants.IS_ERROR_COL in x and Constants.
                    SUM_REPORT_SUFFIX not in x and Constants.ROW_ERROR_SUFFIX
                    not in x,
                    pd_correctness_df.columns,
                )),
            cols=[Constants.DATE_TIME_REPORT_COL],
            vals=[Constants.IS_ERROR_COL + Constants.ROW_ERROR_SUFFIX],
            aggregatorName="Sum",
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )

        pivot_ui(
            pd_completeness_df,
            outfile_path="{}.html".format(config.output_completeness_table),
            menuLimit=5000,
            overwrite=True,
            rows=[Constants.OVER_ALL_COUNT_COL],
            cols=[Constants.DATE_TIME_REPORT_COL],
            vals=[Constants.IS_ERROR_COL + Constants.OVER_ALL_COUNT_COL],
            aggregatorName="Sum",
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )

        pivot_ui(
            comparison_df,
            outfile_path="{}.html".format(config.output_comparison_table),
            menuLimit=5000,
            overwrite=True,
            rows=list(
                filter(
                    lambda x: Constants.DATE_TIME_REPORT_COL not in x,
                    comparison_df.columns,
                )),
            cols=[Constants.DATE_TIME_REPORT_COL],
            rendererName="Table Barchart",
            rowOrder="value_z_to_a",
        )