示例#1
0
def display_data(value):
    """Displaying the head for the selected file."""
    db_value = db.get("file")
    if value is None and db_value is None:
        return ""
    elif value is None and not db_value is None:
        value = db_value
    format = FileUtils.file_format(value)
    if format == 'csv' or format == 'txt':
        path = FileUtils.path('raw', value)
        head = DataUtils.read_text_head(path)
        table_col = [
            html.Col(style={'width': "10%"}),
            html.Col(style={'width': "90%"})
        ]
        table_header = [
            html.Thead(html.Tr([html.Th("Row No"),
                                html.Th("Data")]))
        ]
        rows = []
        for i in range(len(head)):
            row = html.Tr([html.Td(i + 1), html.Td(head[i])])
            rows.append(row)
        table_body = [html.Tbody(rows)]
        table = dbc.Table(table_col + table_header + table_body,
                          bordered=True,
                          style=common.table_style)
        div = [
            common.msg("Selected File: " + value),
            common.msg("Selected Format: " + format), table,
            html.Br(), csv_properties_div
        ]
    elif format == 'xls' or format == 'xlsx':
        path = FileUtils.path('raw', value)
        xls = pd.ExcelFile(path)
        sheets = xls.sheet_names
        div = [
            common.msg("Selected File: " + value),
            common.msg("Selected Format: " + format),
            common.msg("Select Sheet:"),
            html.Div([
                dcc.Dropdown(id='xls-file-sheet',
                             options=[{
                                 'label': sheet,
                                 'value': sheet
                             } for sheet in sheets],
                             value=None,
                             multi=False)
            ],
                     style={
                         'margin': '10px',
                         'width': '50%'
                     }),
            html.Div([], id="display-xls-file")
        ]
    else:
        div = "Format Not Supported!!"
    db.put("file", value)
    db.put("format", format)
    return div
示例#2
0
def xls_file_sheet(value):
    file = db.get("file")
    div = None
    db_value = db.get("sheet")
    if value is None and db_value is None:
        div = []
    elif value is None and not db_value is None:
        value = db_value
    if not value is None:
        db.put('sheet', value)
        path = FileUtils.path('raw', file)
        xls = pd.ExcelFile(path)
        df = pd.read_excel(xls, value)
        table = html.Div([
            dash_table.DataTable(
                data=df.iloc[:10].to_dict('rows'),
                columns=[{'name': i, 'id': i} for i in df.columns]
            ),
            html.Hr(),
        ])
        div = [html.Br(),
                table,
                html.Br(),
                xls_properties_div]
    return div
示例#3
0
def apply_file_properties(n):
    file = db.get("file")
    format = db.get("format")
    sep = db.get("file_separator")
    header = db.get("file_header")
    div = None
    df = None
    if format is None:
        div = None
        return div
    elif (format == 'csv' or format == 'txt' or format == 'xls'
          or format == 'xlsx') and header is None:
        div = common.error_msg('Please Select Header!!')
        return div
    elif format == 'csv' or format == 'txt':
        if sep is None:
            sep = ','
            db.put("file_separator", sep)
        path = FileUtils.path('raw', file)
        df = DataUtils.read_csv(path, sep, header)
        msg = "Following Properties Applied. Separator=" + sep + " Header=" + str(
            header)
    elif format == 'xls' or format == 'xlsx':
        path = FileUtils.path('raw', file)
        sheet = db.get("sheet")
        df = DataUtils.read_xls(path, sheet, header)
        msg = "Following Properties Applied. Header=" + str(header)

    table = dbc.Table.from_dataframe(df.head(10),
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style)
    button = dbc.Button("Clean & Save", color="primary", id='clean-save-file')
    div = [
        common.msg(msg), table,
        html.Div(
            [button, html.Br(),
             html.Div([], id="cleaned-saved-file")],
            style={
                'padding': '10px',
                'textAlign': 'center'
            })
    ]
    db.put("raw_data", df)
    return div
示例#4
0
def clean_save_file(n):
    ## Team 2 API Integration
    df = db.get("raw_data")
    file = db.get("file")
    sheet = db.get("sheet")
    tags = db.get('tags')
    div = None
    if (not n is None) and (not df is None):
        try:
            df, cleaned_df, defective_df, stats = data_cleaning(df)

            if not sheet is None:
                file = FileUtils.append_file_name(file, sheet)
            file = file.split('.')[0]
            path = FileUtils.path('clean', file)
            cleaned_df.to_csv(path, index=False)

            ### Tag the cleaned data ###
            if file in tags:
                tags[file] = tags[file] + 1
            else:
                tags[file] = 1

            col_df = pd.DataFrame(columns=stats['col_name'])
            col_df.loc[0] = stats['col_type']
            stat_df = pd.DataFrame(columns=[
                'Tag', 'Total no of Records', 'Cleaned no of Records',
                'Defective no of Records'
            ])
            stat_df.loc[0] = [
                'Tag ' + str(tags[file]), stats['row_total'],
                stats['row_cleaned'], stats['row_defect']
            ]
            div = html.Div([
                common.success_msg("File is Cleaned & Saved Successfully!!"),
                html.H2('Cleaned Data Statistic'),
                dbc.Table.from_dataframe(stat_df,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style),
                html.H2('Cleaned Data Schema'),
                dbc.Table.from_dataframe(col_df,
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)
            ],
                           style={'margin': '10px'})
        except Exception as e:
            return common.error_msg("Data Cleansing API Error: " + str(e))
    return div
示例#5
0
 def read(dir: str, filename: str):
     format = FileUtils.file_format(filename)
     path = FileUtils.path(dir, filename)
     op = None
     if format == 'csv' or format == 'txt':
         with open(path) as myfile:
             head = [next(myfile).strip() for x in range(N)]
         op = head
     elif format == 'jpeg' or format == 'jpg' or format == 'gif':
         ""
     else:
         op = "Format Not Supported!!"
     return op
示例#6
0
def xls_file_sheet(value):
    file = db.get("file")
    div = None
    db_value = db.get("sheet")
    if value is None and db_value is None:
        div = []
    elif value is None and not db_value is None:
        value = db_value
    if not value is None:
        db.put('sheet', value)
        path = FileUtils.path('raw', file)
        xls = pd.ExcelFile(path)
        df = pd.read_excel(xls, value)
        table = html.Div([
            dbc.Table.from_dataframe(df.head(10),
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.Hr(),
        ])
        div = [html.Br(), table, html.Br(), xls_properties_div]
    return div
示例#7
0
def linear_regression(n):
    global df_cleaned
    file = db.get('lr.file')
    if file is None:
        file = 'empty'
    path = FileUtils.path('clean', file)
    df_cleaned = pd.read_csv(path)
    tdf = df_cleaned.head(10).round(4)
    div = [
        html.Div(children=[
            html.H2(children='Cleaned Data: ' + file),
            html.H2(children='Tag: Tag ' + str(db.get('tags')[file])),
            dbc.Table.from_dataframe(tdf,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style)
        ]),
        html.Hr(),
        html.H3(children='Variable Selection and Plotting'),
        html.Div([
            html.Div([
                html.Div(id='ordered-df', style={'display': 'none'}),
                html.Hr(),
                html.Label('Select X-axis variable for scatter plot'),
                dcc.Dropdown(id='x-var-plot',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
                html.Label('Select Y-axis variable for scatter plot'),
                dcc.Dropdown(id='y-var-plot',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
                html.Br(),
                html.H2('Perform Linear Regression'),
                html.Label('Select X variable from Dropdown'),
                dcc.Dropdown(id='x-var-selection',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=True),
                html.Label('Select Y variable from Dropdown'),
                dcc.Dropdown(id='y-var-selection',
                             options=[{
                                 'label': i,
                                 'value': i
                             } for i in df_cleaned.columns],
                             multi=False),
            ],
                     style={
                         'width': '48%',
                         'display': 'inline-block'
                     }),
            html.Div([
                html.Label('Scatter Plot'),
                dcc.Graph(id='scatter-plot'),
            ],
                     style={
                         'width': '48%',
                         'float': 'right',
                         'display': 'inline-block'
                     }),
        ]),
        html.Hr(),
        html.Div([
            html.Div([], id='linear-regression-status'),
            html.Br(),
            html.H2('Statistics Summary Table'),
            html.Table(id='stats_table'),
            html.H2('Linear Regression Coefficients'),
            html.Table(id='coeff_table'),
            html.H2('Plot')
        ]),
        html.Br(),
        html.Div([
            dcc.Graph(id='lr-y-ycap-plot', figure=y_ycap_fig),
            dcc.Graph(id='lr-error-plot', figure=error_fig),
            html.Div([], id='lr-error-mean')
        ]),
        html.Div([
            html.Hr(),
            html.H2('ANOVA Table'),
            html.Div([], id='lr-anova-table'),
        ]),
        html.Div([
            html.Hr(),
            dbc.Label(
                'Predict Data (pass comma separated) Dependent Variables'),
            dbc.Input(id="lr-predict-data",
                      placeholder="Model Name",
                      type="text"),
            html.Br(),
            dbc.Button("Predict", color="primary", id='lr-predict'),
            html.Div([], id='lr-predict-display'),
            html.Div([], id='lr-predict-data-do-nothing'),
        ]),
        html.Div([
            html.Hr(),
            dbc.Label('Save Model'),
            dbc.Input(id="lr-save-model",
                      placeholder="Model Name",
                      type="text"),
            html.Br(),
            dbc.Button("Save", color="primary", id='lr-save'),
            html.Div([], id='lr-save-display'),
            html.Div([], id='lr-save-model-do-nothing'),
        ])
    ]
    return div
示例#8
0
import pandas as pd

from dataanalytics.ux.app import app
from dataanalytics.ux.apps import common
from dataanalytics.ux.apps.common import *
from dataanalytics.framework.database import db
from dataanalytics.framework.file_utils import FileUtils
from dataanalytics.framework.data_utils import DataUtils
from dataanalytics.stats_linear_regression.linear_regression import LinearRegression
from dataanalytics.stat_anova.anova import get_anova

file = db.get('lr.file')
if file is None:
    file = 'empty'
path = FileUtils.path('clean', file)
df_cleaned = pd.read_csv(path)

y_ycap_title = go.Layout(title='Actual vs Predicted Y Plot',
                         hovermode='closest',
                         xaxis={'title': 'Sequence of data points'},
                         yaxis={'title': 'y,ŷ'})
y_ycap_fig = go.Figure(data=[], layout=y_ycap_title)

error_title = go.Layout(title='Error Plot',
                        hovermode='closest',
                        xaxis={'title': 'Sequence of data points'},
                        yaxis={'title': 'Error = y - ŷ'})
error_fig = go.Figure(data=[], layout=error_title)

layout = html.Div(children=[