def display_data(value): """Displaying the head for the selected file.""" db_value = db.get("file") if value is None and db_value is None: return "" elif value is None and not db_value is None: value = db_value format = FileUtils.file_format(value) if format == 'csv' or format == 'txt': path = FileUtils.path('raw', value) head = DataUtils.read_text_head(path) table_col = [ html.Col(style={'width': "10%"}), html.Col(style={'width': "90%"}) ] table_header = [ html.Thead(html.Tr([html.Th("Row No"), html.Th("Data")])) ] rows = [] for i in range(len(head)): row = html.Tr([html.Td(i + 1), html.Td(head[i])]) rows.append(row) table_body = [html.Tbody(rows)] table = dbc.Table(table_col + table_header + table_body, bordered=True, style=common.table_style) div = [ common.msg("Selected File: " + value), common.msg("Selected Format: " + format), table, html.Br(), csv_properties_div ] elif format == 'xls' or format == 'xlsx': path = FileUtils.path('raw', value) xls = pd.ExcelFile(path) sheets = xls.sheet_names div = [ common.msg("Selected File: " + value), common.msg("Selected Format: " + format), common.msg("Select Sheet:"), html.Div([ dcc.Dropdown(id='xls-file-sheet', options=[{ 'label': sheet, 'value': sheet } for sheet in sheets], value=None, multi=False) ], style={ 'margin': '10px', 'width': '50%' }), html.Div([], id="display-xls-file") ] else: div = "Format Not Supported!!" db.put("file", value) db.put("format", format) return div
def xls_file_sheet(value): file = db.get("file") div = None db_value = db.get("sheet") if value is None and db_value is None: div = [] elif value is None and not db_value is None: value = db_value if not value is None: db.put('sheet', value) path = FileUtils.path('raw', file) xls = pd.ExcelFile(path) df = pd.read_excel(xls, value) table = html.Div([ dash_table.DataTable( data=df.iloc[:10].to_dict('rows'), columns=[{'name': i, 'id': i} for i in df.columns] ), html.Hr(), ]) div = [html.Br(), table, html.Br(), xls_properties_div] return div
def apply_file_properties(n): file = db.get("file") format = db.get("format") sep = db.get("file_separator") header = db.get("file_header") div = None df = None if format is None: div = None return div elif (format == 'csv' or format == 'txt' or format == 'xls' or format == 'xlsx') and header is None: div = common.error_msg('Please Select Header!!') return div elif format == 'csv' or format == 'txt': if sep is None: sep = ',' db.put("file_separator", sep) path = FileUtils.path('raw', file) df = DataUtils.read_csv(path, sep, header) msg = "Following Properties Applied. Separator=" + sep + " Header=" + str( header) elif format == 'xls' or format == 'xlsx': path = FileUtils.path('raw', file) sheet = db.get("sheet") df = DataUtils.read_xls(path, sheet, header) msg = "Following Properties Applied. Header=" + str(header) table = dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style=common.table_style) button = dbc.Button("Clean & Save", color="primary", id='clean-save-file') div = [ common.msg(msg), table, html.Div( [button, html.Br(), html.Div([], id="cleaned-saved-file")], style={ 'padding': '10px', 'textAlign': 'center' }) ] db.put("raw_data", df) return div
def clean_save_file(n): ## Team 2 API Integration df = db.get("raw_data") file = db.get("file") sheet = db.get("sheet") tags = db.get('tags') div = None if (not n is None) and (not df is None): try: df, cleaned_df, defective_df, stats = data_cleaning(df) if not sheet is None: file = FileUtils.append_file_name(file, sheet) file = file.split('.')[0] path = FileUtils.path('clean', file) cleaned_df.to_csv(path, index=False) ### Tag the cleaned data ### if file in tags: tags[file] = tags[file] + 1 else: tags[file] = 1 col_df = pd.DataFrame(columns=stats['col_name']) col_df.loc[0] = stats['col_type'] stat_df = pd.DataFrame(columns=[ 'Tag', 'Total no of Records', 'Cleaned no of Records', 'Defective no of Records' ]) stat_df.loc[0] = [ 'Tag ' + str(tags[file]), stats['row_total'], stats['row_cleaned'], stats['row_defect'] ] div = html.Div([ common.success_msg("File is Cleaned & Saved Successfully!!"), html.H2('Cleaned Data Statistic'), dbc.Table.from_dataframe(stat_df, striped=True, bordered=True, hover=True, style=common.table_style), html.H2('Cleaned Data Schema'), dbc.Table.from_dataframe(col_df, striped=True, bordered=True, hover=True, style=common.table_style) ], style={'margin': '10px'}) except Exception as e: return common.error_msg("Data Cleansing API Error: " + str(e)) return div
def read(dir: str, filename: str): format = FileUtils.file_format(filename) path = FileUtils.path(dir, filename) op = None if format == 'csv' or format == 'txt': with open(path) as myfile: head = [next(myfile).strip() for x in range(N)] op = head elif format == 'jpeg' or format == 'jpg' or format == 'gif': "" else: op = "Format Not Supported!!" return op
def xls_file_sheet(value): file = db.get("file") div = None db_value = db.get("sheet") if value is None and db_value is None: div = [] elif value is None and not db_value is None: value = db_value if not value is None: db.put('sheet', value) path = FileUtils.path('raw', file) xls = pd.ExcelFile(path) df = pd.read_excel(xls, value) table = html.Div([ dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style=common.table_style), html.Hr(), ]) div = [html.Br(), table, html.Br(), xls_properties_div] return div
def linear_regression(n): global df_cleaned file = db.get('lr.file') if file is None: file = 'empty' path = FileUtils.path('clean', file) df_cleaned = pd.read_csv(path) tdf = df_cleaned.head(10).round(4) div = [ html.Div(children=[ html.H2(children='Cleaned Data: ' + file), html.H2(children='Tag: Tag ' + str(db.get('tags')[file])), dbc.Table.from_dataframe(tdf, striped=True, bordered=True, hover=True, style=common.table_style) ]), html.Hr(), html.H3(children='Variable Selection and Plotting'), html.Div([ html.Div([ html.Div(id='ordered-df', style={'display': 'none'}), html.Hr(), html.Label('Select X-axis variable for scatter plot'), dcc.Dropdown(id='x-var-plot', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), html.Label('Select Y-axis variable for scatter plot'), dcc.Dropdown(id='y-var-plot', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), html.Br(), html.H2('Perform Linear Regression'), html.Label('Select X variable from Dropdown'), dcc.Dropdown(id='x-var-selection', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=True), html.Label('Select Y variable from Dropdown'), dcc.Dropdown(id='y-var-selection', options=[{ 'label': i, 'value': i } for i in df_cleaned.columns], multi=False), ], style={ 'width': '48%', 'display': 'inline-block' }), html.Div([ html.Label('Scatter Plot'), dcc.Graph(id='scatter-plot'), ], style={ 'width': '48%', 'float': 'right', 'display': 'inline-block' }), ]), html.Hr(), html.Div([ html.Div([], id='linear-regression-status'), html.Br(), html.H2('Statistics Summary Table'), html.Table(id='stats_table'), html.H2('Linear Regression Coefficients'), html.Table(id='coeff_table'), html.H2('Plot') ]), html.Br(), html.Div([ dcc.Graph(id='lr-y-ycap-plot', figure=y_ycap_fig), dcc.Graph(id='lr-error-plot', figure=error_fig), html.Div([], id='lr-error-mean') ]), html.Div([ html.Hr(), html.H2('ANOVA Table'), html.Div([], id='lr-anova-table'), ]), html.Div([ html.Hr(), dbc.Label( 'Predict Data (pass comma separated) Dependent Variables'), dbc.Input(id="lr-predict-data", placeholder="Model Name", type="text"), html.Br(), dbc.Button("Predict", color="primary", id='lr-predict'), html.Div([], id='lr-predict-display'), html.Div([], id='lr-predict-data-do-nothing'), ]), html.Div([ html.Hr(), dbc.Label('Save Model'), dbc.Input(id="lr-save-model", placeholder="Model Name", type="text"), html.Br(), dbc.Button("Save", color="primary", id='lr-save'), html.Div([], id='lr-save-display'), html.Div([], id='lr-save-model-do-nothing'), ]) ] return div
import pandas as pd from dataanalytics.ux.app import app from dataanalytics.ux.apps import common from dataanalytics.ux.apps.common import * from dataanalytics.framework.database import db from dataanalytics.framework.file_utils import FileUtils from dataanalytics.framework.data_utils import DataUtils from dataanalytics.stats_linear_regression.linear_regression import LinearRegression from dataanalytics.stat_anova.anova import get_anova file = db.get('lr.file') if file is None: file = 'empty' path = FileUtils.path('clean', file) df_cleaned = pd.read_csv(path) y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'y,ŷ'}) y_ycap_fig = go.Figure(data=[], layout=y_ycap_title) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': 'Sequence of data points'}, yaxis={'title': 'Error = y - ŷ'}) error_fig = go.Figure(data=[], layout=error_title) layout = html.Div(children=[