def display_data(value): """Displaying the head for the selected file.""" db_value = db.get("file") if value is None and db_value is None: return "" elif value is None and not db_value is None: value = db_value format = FileUtils.file_format(value) if format == 'csv' or format == 'txt': path = FileUtils.path('raw', value) head = DataUtils.read_text_head(path) table_col = [ html.Col(style={'width': "10%"}), html.Col(style={'width': "90%"}) ] table_header = [ html.Thead(html.Tr([html.Th("Row No"), html.Th("Data")])) ] rows = [] for i in range(len(head)): row = html.Tr([html.Td(i + 1), html.Td(head[i])]) rows.append(row) table_body = [html.Tbody(rows)] table = dbc.Table(table_col + table_header + table_body, bordered=True, style=common.table_style) div = [ common.msg("Selected File: " + value), common.msg("Selected Format: " + format), table, html.Br(), csv_properties_div ] elif format == 'xls' or format == 'xlsx': path = FileUtils.path('raw', value) xls = pd.ExcelFile(path) sheets = xls.sheet_names div = [ common.msg("Selected File: " + value), common.msg("Selected Format: " + format), common.msg("Select Sheet:"), html.Div([ dcc.Dropdown(id='xls-file-sheet', options=[{ 'label': sheet, 'value': sheet } for sheet in sheets], value=None, multi=False) ], style={ 'margin': '10px', 'width': '50%' }), html.Div([], id="display-xls-file") ] else: div = "Format Not Supported!!" db.put("file", value) db.put("format", format) return div
def apply_file_properties(n): file = db.get("file") format = db.get("format") sep = db.get("file_separator") header = db.get("file_header") div = None df = None if format is None: div = None return div elif (format == 'csv' or format == 'txt' or format == 'xls' or format == 'xlsx') and header is None: div = common.error_msg('Please Select Header!!') return div elif format == 'csv' or format == 'txt': if sep is None: sep = ',' db.put("file_separator", sep) path = FileUtils.path('raw', file) df = DataUtils.read_csv(path, sep, header) msg = "Following Properties Applied. Separator=" + sep + " Header=" + str( header) elif format == 'xls' or format == 'xlsx': path = FileUtils.path('raw', file) sheet = db.get("sheet") df = DataUtils.read_xls(path, sheet, header) msg = "Following Properties Applied. Header=" + str(header) table = dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style=common.table_style) button = dbc.Button("Clean & Save", color="primary", id='clean-save-file') div = [ common.msg(msg), table, html.Div( [button, html.Br(), html.Div([], id="cleaned-saved-file")], style={ 'padding': '10px', 'textAlign': 'center' }) ] db.put("raw_data", df) return div
def stats_table_and_hor_regression(json_ordered_data, hor_order): if json_ordered_data is None or hor_order is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) x_col = [col[0]] y_col = col[1] dff = dff.sort_values(by=x_col) x = list(dff[col[0]]) y = list(dff[col[1]]) ##Team 4 API Integration try: db.put("hor.x_col", x_col) db.put("hor.y_col", y_col) (ycap, params) = Building_model_equation(x, y, hor_order) print(params) params = params.tolist() print(params) db.put("hor.params", params) db.put("hor.ycap", ycap) db.put("hor.order", hor_order) error = 10.0 for i in range(len(y)): error += y[i] - ycap[i] error_mean = error / len(y) db.put("hor.error_mean", error_mean) db.put("hor.error_mean", error_mean) ## Team 3 API call for Summary Statistics model = LinearRegression() (summary, params_ignore, ycap_ignore) = model.fit([x], y) db.put("hor.summary", summary) except (Exception, ValueError) as e: return (common.error_msg("Higher Order Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.hor_get_coeff_df(params) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) trace_actual = go.Scatter(x=x, y=y, name='Y Actual', mode='markers', marker=dict(color='rgb(106, 181, 135)')) trace_predict = go.Scatter(x=x, y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_error = go.Scatter(x=x, y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) x_title = "x (" + str(x_col[0]) + ")" y_title = "y,ŷ(" + str(y_col) + ")" y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': y_title}) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': 'Error = y - ŷ'}) fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title) fig2 = go.Figure(data=[trace_error], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('hor.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('hor.anova', anova) anova_div = common.get_anova_div(anova) independent_var = ','.join(x_col) return (common.success_msg( "Higher Order Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div, html.H2(independent_var))
def stats_table_and_linear_regression(json_ordered_data): if json_ordered_data is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) y = list(dff[col[-1]]) data = [] x_col = col[:-1] y_col = col[-1] data = [[] for i in range(len(x_col))] for i in range(len(x_col)): x = dff[x_col[i]].values.tolist() data[i] = x ##Team 3 API Integration try: model = LinearRegression() db.put("lr.model", model) db.put("lr.x_col", x_col) db.put("lr.y_col", y_col) (summary, params, ycap) = model.fit(data, y) db.put("lr.summary", summary) db.put("lr.params", params) db.put("lr.ycap", ycap) error_mean = model.model_stats()['mean'] db.put("lr.error_mean", error_mean) except (Exception, ValueError) as e: return (common.error_msg("Linear Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.get_coeff_df(params, x_col) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) trace_1 = go.Scatter(x=list(range(len(y))), y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) trace_2 = go.Scatter(x=list(range(len(y))), y=y, name='Y Actual', line=dict(width=2, color='rgb(106, 181, 135)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_3 = go.Scatter(x=list(range(len(y))), y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) fig1 = go.Figure(data=[trace_1, trace_2], layout=y_ycap_title) fig2 = go.Figure(data=[trace_3], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('lr.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('lr.anova', anova) anova_div = common.get_anova_div(anova) return ( common.success_msg("Linear Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div)
def stats_table_and_linear_regression(json_ordered_data): if json_ordered_data is None: return (common.msg(None), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") dff = pd.read_json(json_ordered_data, orient='split') col = list(dff.columns) x_col = col[:-1] y_col = col[-1] if len(x_col) == 1: dff = dff.sort_values(by=x_col) data = [[] for i in range(len(x_col))] for i in range(len(x_col)): x = dff[x_col[i]].values.tolist() data[i] = x y = list(dff[col[-1]]) ##Team 3 API Integration try: model = LinearRegression() db.put("lr.model", model) db.put("lr.x_col", x_col) db.put("lr.y_col", y_col) (summary, params, ycap) = model.fit(data, y) db.put("lr.summary", summary) db.put("lr.params", params) db.put("lr.ycap", ycap) error_mean = model.model_stats()['mean'] db.put("lr.error_mean", error_mean) except (Exception, ValueError) as e: return (common.error_msg("Linear Regression API Error: " + str(e)), generate_table(pd.DataFrame(columns=[])), generate_table(pd.DataFrame(columns=[])), y_ycap_fig, error_fig, "", "", "") df_stats = common.get_stats_df(summary, x_col, y_col) table1 = dbc.Table.from_dataframe(df_stats, striped=True, bordered=True, hover=True, style=common.table_style) df_coeff = common.get_coeff_df(params, x_col) table2 = dbc.Table.from_dataframe(df_coeff, striped=True, bordered=True, hover=True, style=common.table_style) if len(data) == 1: trace_x = data[0] x_title = "x (" + str(x_col[0]) + ")" trace_actual = go.Scatter(x=trace_x, y=y, name='Y Actual', mode='markers', marker=dict(color='rgb(106, 181, 135)')) else: trace_x = list(range(len(y))) x_title = 'Sequence of data points' trace_actual = go.Scatter(x=trace_x, y=y, name='Y Actual', line=dict(width=2, color='rgb(106, 181, 135)')) trace_predict = go.Scatter(x=trace_x, y=ycap, name='Y Predicted (ŷ)', line=dict(width=2, color='rgb(229, 151, 50)')) ydiff = [y[i] - ycap[i] for i in range(len(y))] trace_error = go.Scatter(x=trace_x, y=ydiff, line=dict(width=2, color='rgb(236, 10, 15)')) y_title = "y,ŷ(" + str(y_col) + ")" y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': y_title}) error_title = go.Layout(title='Error Plot', hovermode='closest', xaxis={'title': x_title}, yaxis={'title': 'Error = y - ŷ'}) fig1 = go.Figure(data=[trace_predict, trace_actual], layout=y_ycap_title) fig2 = go.Figure(data=[trace_error], layout=error_title) error_mean = html.H2('Error Mean = ' + str(round(db.get('lr.error_mean'), 4))) ##Team 5 API Integration anova = get_anova(y, ycap, len(params)) db.put('lr.anova', anova) anova_div = common.get_anova_div(anova) independent_var = ','.join(x_col) return ( common.success_msg("Linear Regression API Exceuted Successfully!!"), table1, table2, fig1, fig2, error_mean, anova_div, html.H2(independent_var))