def nlcl_scatter_plot(n):
    df = db.get("nlcl.data")
    clazz_col = db.get("nlcl.class")
    x_col = db.get("nlcl.x_axis")
    y_col = db.get("nlcl.y_axis")
    if clazz_col is None or x_col is None or y_col is None:
        return None
    graph = dcc.Graph(
        id='nlcl-x-vs-y',
        figure={
            'data': [
                go.Scatter(
                    x=df[df[clazz_col] == clazz][x_col],
                    y=df[df[clazz_col] == clazz][y_col],
                    text=df[df[clazz_col] == clazz][clazz_col],
                    mode='markers',
                    opacity=0.8,
                    marker={
                        'size': 15,
                        'line': {'width': 0.5, 'color': 'white'}
                    },
                    name=clazz
                ) for clazz in df[clazz_col].unique()
            ],
            'layout': dict(
                #title='Scatter Plot',
                xaxis={'title': x_col},
                yaxis={'title': y_col},
                margin={'l': 40, 'b': 40},
                legend={'x': 0, 'y': 1},
                hovermode='closest'
            )
        }
    )
    return graph
示例#2
0
def apply_file_properties(n):
    file = db.get("file")
    format = db.get("format")
    sep = db.get("file_separator")
    header = db.get("file_header")
    div = None
    if format is None:
        div = None
    elif (format == 'csv' or format == 'txt') and header is None:
        div = common.error_msg('Please Select Header!!')
    elif format == 'csv' or format == 'txt':
        if sep is None:
            sep = ','
            db.put("file_separator", sep)
        path = FileUtils.path('raw', file)
        df = DataUtils.read_csv(path, sep, header)
        db.put("data", df)
        msg = "Following Properties Applied. Separator=" + sep + " Header=" + str(
            header)
        table = dbc.Table.from_dataframe(df.head(10),
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)
        div = [common.msg(msg), table]
    return div
示例#3
0
def display_data(value):
    """Displaying the head for the selected file."""
    db_value = db.get("file")
    if value is None and db_value is None:
        return ""
    elif value is None and not db_value is None:
        value = db_value
    elif not value == db_value:
        db.reset()
    format = FileUtils.file_format(value)
    if format == 'csv' or format == 'txt':
        path = FileUtils.path('raw', value)
        head = DataUtils.read_text_head(path)
        table_col = [html.Col(style = {'width':"10%"}), html.Col(style = {'width':"90%"})]
        table_header = [html.Thead(html.Tr([html.Th("Row No"), html.Th("Data")]))]
        rows = []
        for i in range(len(head)):
            row = html.Tr([html.Td(i+1), html.Td(head[i])])
            rows.append(row)
        table_body = [html.Tbody(rows)]
        table = dbc.Table(table_col+ table_header + table_body, bordered=True, style = common.table_style)
        div =  [common.msg("Selected File: " + value),
                common.msg("Selected Format: " + format),
                table,
                html.Br(),
                csv_properties_div]
    elif format == 'jpeg' or format == 'jpg' or format == 'gif':
        div =  [common.msg("Selected File: " + value),
                common.msg("Selected Format: " + format)]
    else:
        div = "Format Not Supported!!"
    db.put("file", value)
    db.put("format", format)
    return div
def sgd_model_predict(n_clicks):
    var = db.get('sgd.model_variables')
    predict_data = db.get("sgd.model_prediction_data")
    summary = db.get('sgd.model_summary')
    model = db.get('sgd.model')
    yu = db.get('sgd.model_yu')
    n_var = len(var)

    if predict_data is None:
        return ("", "")
    if len(predict_data.split(',')) != n_var:
        return (common.error_msg('Enter Valid Prediction Data!!'), "")
    try:
        layer = db.get("sgd.model_layer")
        if layer == 1:
            feature_vector = get_predict_data_list(predict_data)
            df = pd.DataFrame(columns=var)
            df.loc[0] = feature_vector
            prediction = ann_predict(df, model, yu)
        elif layer == 2:
            feature_vector = get_predict_data_list(predict_data)
            prediction = ann_predict_h2(feature_vector, model, yu)
        reverse_quantized_classes = db.get('sgd.reverse_quantized_classes')
        prediction = reverse_quantized_classes[int(prediction)]
        db.put('sgd.prediction', prediction)
    except Exception as e:
        traceback.print_exc()
        return (common.error_msg("Exception during prediction: " + str(e)), "")
    return common.success_msg('Predicted/Classified Class = ' + prediction)
def sgd_predict_model_div(child):
    model = db.get('sgd.model')
    if model is None:
        return ""

    var = db.get('sgd.model_variables')

    div = html.Div([
        html.Br(),
        html.H2('Prediction/Classification:'),
        html.P('Features to be Predicted (comma separated): ' + ','.join(var),
               style={'font-size': '16px'}),
        dbc.Input(id="sgd-prediction-data",
                  placeholder=','.join(var),
                  type="text"),
        html.Br(),
        dbc.Button("Predict", color="primary", id='sgd-predict'),
        html.Div([], id="sgd-prediction")
    ])
    return div
示例#6
0
def knn_model_predict(n_clicks):
    c = db.get('knn.model_class')
    predict_data = db.get("knn.model_prediction_data")
    var = db.get('knn.model_variables')
    n_var = len(var)
    k = db.get('knn.distance')
    train_df = db.get('knn.data_train')
    if predict_data is None:
        return ("" , "")
    if len(predict_data.split(',')) != n_var:
        return (common.error_msg('Enter Valid Prediction Data!!'), "")
    try:
        cols = [] + var
        cols.append(c)
        train_dataset = train_df[cols].astype(str).values.tolist()

        feature_vector = get_predict_data_list(predict_data)
        feature_vector.append('')
        feature_vector = [feature_vector]

        result = knn_predict(train_dataset, feature_vector, k)
        prediction = result[0][-1]
        print(prediction)
        db.put('knn.prediction', prediction)
    except Exception as e:
        traceback.print_exc()
        return (common.error_msg("Exception during prediction: " + str(e)), "")
    df = db.get('knn.data_train')
    df = df.iloc[:, :-1]
    div = html.Div([
        html.Div([html.H2("Predicted & Training Data Set Scatter Plot")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}),
        dbc.Row([
            dbc.Col([
                dbc.Label("Select X Axis"),
                dcc.Dropdown(
                    id = 'knn-x-axis-predict',
                    options=[{'label':col, 'value':col} for col in [*df]],
                    value=None,
                    multi=False
                ),
                html.Br(),
                dbc.Label("Select Y Axis"),
                dcc.Dropdown(
                    id = 'knn-y-axis-predict',
                    options=[{'label':col, 'value':col} for col in [*df]],
                    value=None,
                    multi=False
                ),
                html.Br(),
                dbc.Button("Plot", color="primary", id = 'knn-predict-scatter-plot-button'),
                html.Div([], id = "knn-x-axis-predict-do-nothing"),
                html.Div([], id = "knn-y-axis-predict-do-nothing")
            ], md=2,
            style = {'margin': '10px', 'font-size': '16px'}),
            dbc.Col([], md=9, id="knn-scatter-plot-predict")
        ]),

    ])
    return (common.success_msg('Predicted/Classified Class = ' + prediction), div)
示例#7
0
def dtn_model_train(n_clicks):
    c = db.get('dtn.model_class')
    var = db.get('dtn.model_variables')
    max_depth = db.get('dtn.max_depth')
    min_size = db.get('dtn.min_size')
    folds = 5
    if c is None or var is None or max_depth is None or min_size is None:
        div = ""
    elif (not c is None) and (not var is None) and (not max_depth is None) and (not min_size is None):
        try:
            path = FileUtils.path('extra', 'banknote.csv')

            tree, avg_score, avg_f1_score = train(path, max_depth, min_size, folds)

            summary = {}
            summary['Max Depth'] = max_depth
            summary['Min Size'] = min_size
            summary['Folds'] = folds
            summary['Average Score'] = round(avg_score, 4)
            summary['Average F1 Score'] = round(avg_f1_score, 4)
            summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value'])

            db.put('dtn.model_summary', summary)
            db.put('dtn.model_instance', tree)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " + str(e))

        div = html.Div([
            html.H2('Model Parameters & Summary:'),
            dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.Br(),
            html.H2('Tree'),
            html.H2(str(tree)),
            ])
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div
def dt_model_predict(n_clicks):
    var = db.get('dt.model_variables')
    predict_data = db.get("dt.model_prediction_data")
    model = db.get('dt.model_instance')
    n_var = len(var)

    if predict_data is None:
        return ("" , "")
    if len(predict_data.split(',')) != n_var:
        return (common.error_msg('Enter Valid Prediction Data!!'), "")
    try:
        feature_vector = get_predict_data_list(predict_data)
        feature_vector.append(-1)
        feature_vector = [feature_vector]

        prediction = model.predict(feature_vector)
        print(prediction)
        prediction = str(prediction[0])
        db.put('dt.prediction', prediction)
    except Exception as e:
        traceback.print_exc()
        return (common.error_msg("Exception during prediction: " + str(e)), "")
    return common.success_msg('Predicted/Classified Class = ' + prediction)
def cl_scatter_plot(n):
    df = db.get("cl.data_test")
    clazz_col = db.get('cl.model_class')
    x_col = db.get("cl.x_axis_predict")
    y_col = db.get("cl.y_axis_predict")
    predict_data = db.get("cl.model_prediction_data")
    prediction = db.get('cl.prediction')

    feature_vector = get_predict_data_list(predict_data)
    feature_vector.append('Predicted-' + prediction)
    df.loc[len(df)] = feature_vector

    if clazz_col is None or x_col is None or y_col is None:
        return None
    graph = dcc.Graph(
        id='cl-x-vs-y-predict',
        figure={
            'data': [
                go.Scatter(x=df[df[clazz_col] == clazz][x_col],
                           y=df[df[clazz_col] == clazz][y_col],
                           text=df[df[clazz_col] == clazz][clazz_col],
                           mode='markers',
                           opacity=0.8,
                           marker={
                               'size': 15,
                               'line': {
                                   'width': 0.5,
                                   'color': 'white'
                               }
                           },
                           name=clazz) for clazz in df[clazz_col].unique()
            ],
            'layout':
            dict(
                #title='Scatter Plot',
                xaxis={'title': x_col},
                yaxis={'title': y_col},
                margin={
                    'l': 40,
                    'b': 40
                },
                legend={
                    'x': 0,
                    'y': 1
                },
                hovermode='closest')
        })
    return graph
def get_confusion_matrix(df, c, var, model, yu, reverse_quantized_classes):
    classes = df[c].unique()
    i = 0
    d = {}
    for clazz in classes:
        clazz = str(int(clazz))
        d[clazz] = {'t_rel': 0, 't_ret': 0, 'rr': 0}
    for index, row in df.iterrows():
        clazz = str(int(row[c]))
        feature_vector = df[var].iloc[i:i + 1]
        i = i + 1
        layer = db.get("sgd.model_layer")
        if layer == 1:
            prediction = ann_predict(feature_vector, model, yu)
        elif layer == 2:
            prediction = ann_predict_h2(feature_vector, model, yu)
        prediction = str(int(prediction))
        d[clazz]['t_rel'] = d[clazz]['t_rel'] + 1
        d[prediction]['t_ret'] = d[prediction]['t_ret'] + 1
        if clazz == prediction:
            d[clazz]['rr'] = d[clazz]['rr'] + 1
    df = pd.DataFrame(columns=[
        'Class', 'Total Retrieved Records', 'Total Relevant Records',
        'Retrieved & Relevant', 'Precision', 'Recall'
    ])
    i = 0
    for k, v in d.items():
        key = reverse_quantized_classes[int(k)]
        if v['t_ret'] == 0:
            t1 = '-'
        else:
            t1 = round(v['rr'] / v['t_ret'], 4)
        if v['t_rel'] == 0:
            t2 = '-'
        else:
            t2 = round(v['rr'] / v['t_rel'], 4)
        df.loc[i] = [key, v['t_ret'], v['t_rel'], v['rr'], t1, t2]
        i = i + 1
    return df
示例#11
0
def dt_display_selected_file_scatter_plot(value):
    db_value = db.get("dt.file")
    if value is None and db_value is None:
        return common.msg("Please select a cleaned file to proceed!!")
    elif value is None and not db_value is None:
        value = db_value

    db.put("dt.file", value)
    file = value
    path = FileUtils.path('clean', file)
    df = DataUtils.read_csv(path)
    db.put("dt.data", df)

    div = html.Div([
        common.msg("Selected cleaned file: "+ file),
        dbc.Table.from_dataframe(df.head(10).astype(str), striped=True, bordered=True, hover=True, style = common.table_style),
        #html.Div([html.H3("Data Statistics")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}),
        #dbc.Table.from_dataframe(stats, striped=True, bordered=True, hover=True, style = common.table_style),
        html.Br(),
        get_dt_model_properties_div(df),
        html.Div([], id = "dt-trained-model", style = {'margin': '10px'}),
    ])

    return div
示例#12
0
def knn_model_train(n_clicks):
    c = db.get('knn.model_class')
    var = db.get('knn.model_variables')
    train = db.get('knn.model_train')
    k = db.get('knn.distance')
    file = db.get("knn.file")
    if c is None and var is None and train is None and k is None:
        div = ""
    elif train is None or train < 0 or train > 100:
        div = common.error_msg('Training % should be between 0 - 100 !!')
    elif (not c is None) and (not var is None) and (not train is None) and (not k is None):

        try:
            cols = [] + var
            cols.append(c)
            df = db.get('knn.data')
            df = df[cols]

            train_df, test_df = common.split_df(df, c, train)
            
            distinct_count_df_total = get_distinct_count_df(df, c, 'Total Count')
            distinct_count_df_train = get_distinct_count_df(train_df, c, 'Training Count')
            distinct_count_df_test = get_distinct_count_df(test_df, c, 'Testing Count')

            distinct_count_df = distinct_count_df_total.join(distinct_count_df_train.set_index('Class'), on='Class')
            distinct_count_df = distinct_count_df.join(distinct_count_df_test.set_index('Class'), on='Class')

            train_dataset = train_df[cols].astype(str).values.tolist()
            test_dataset = test_df[cols].astype(str).values.tolist()

            result = knn_predict(train_dataset, test_dataset, k)
            cc_percentage = calculate_predict_accuracy(result)

            summary = {}
            summary['Total Training Data'] = len(train_df)
            summary['Total Testing Data'] = len(test_df)
            summary['Total Number of Features in Dataset'] = len(var)
            summary['Model Accuracy %'] = round(cc_percentage, 2)
            summary['Features'] = str(var)
            summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value'])

            db.put('knn.data_train', train_df)
            db.put('knn.data_test', test_df)
            db.put('knn.model_summary', summary)
            classes = df[c].unique()
            confusion_df = get_confusion_matrix(result, classes)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " + str(e))

        div = html.Div([
            html.H2('Class Grouping in Data:'),
            dbc.Table.from_dataframe(distinct_count_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.H2('Model Parameters & Summary:'),
            dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.H2('Confusion Matrix (Precision & Recall):'),
            dbc.Table.from_dataframe(confusion_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.H2('Prediction/Classification:'),
            html.P('Features to be Predicted (comma separated): ' + ','.join(var), style = {'font-size': '16px'}),
            dbc.Input(id="knn-prediction-data", placeholder=','.join(var), type="text"),
            html.Br(),
            dbc.Button("Predict", color="primary", id = 'knn-predict'),
            html.Div([], id = "knn-prediction"),
            html.Div([],id = "knn-predicted-scatter-plot")
        ])
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div
def sgd_model_train(n_clicks):
    c = db.get('sgd.model_class')
    var = db.get('sgd.model_variables')
    train = db.get('sgd.model_train')
    #test = db.get('sgd.model_test')
    lr = db.get('sgd.model_lr')
    epoch = db.get('sgd.model_epoch')
    #no_of_hidden_layer = db.get('sgd.no_of_hidden_layer')
    no_of_neuron = db.get('sgd.no_of_neuron')
    no_of_neuron_h2 = db.get('sgd.no_of_neuron_h2')
    layer = 1
    if not no_of_neuron_h2 is None:
        layer = 2
    db.put("sgd.model_layer", layer)
    if c is None and var is None and train is None and lr is None and epoch is None:
        div = ""
    elif train is None or train < 0 or train > 100:
        div = common.error_msg('Training % should be between 0 - 100 !!')
    elif (not c is None) and (not var is None) and (not train is None) and (
            not lr is None) and (not epoch is None):
        try:
            cols = [] + var
            cols.append(c)
            df = db.get('sgd.data')
            df = df[cols]
            ## Make DataFrame compatible for SGD API ##
            df, quantized_classes, reverse_quantized_classes = quantized_class(
                df, c)

            train_df, test_df = common.split_df(df, c, train)

            distinct_count_df_total = get_distinct_count_df(
                df, c, 'Total Count')
            distinct_count_df_train = get_distinct_count_df(
                train_df, c, 'Training Count')
            distinct_count_df_test = get_distinct_count_df(
                test_df, c, 'Testing Count')

            distinct_count_df = distinct_count_df_total.join(
                distinct_count_df_train.set_index('Class'), on='Class')
            distinct_count_df = distinct_count_df.join(
                distinct_count_df_test.set_index('Class'), on='Class')
            distinct_count_df['Class'] = distinct_count_df['Class'].map(
                reverse_quantized_classes)

            if layer == 1:
                ycap, loss_dict, cc_percentage, wc_percentage, model, yu = ann_training(
                    train_df[var], train_df[c], no_of_neuron, lr, epoch)
                ycap, cc_percentage, wc_percentage = ann_testing(
                    test_df[var], test_df[c], model, yu)
            elif layer == 2:
                ycap, loss_dict, cc_percentage, wc_percentage, model, yu = ann_training_h2(
                    train_df[var], train_df[c], no_of_neuron, no_of_neuron_h2,
                    lr, epoch)
                ycap, cc_percentage, wc_percentage = ann_testing_h2(
                    train_df[var], train_df[c], model, yu)

            summary = {}
            summary['Total Training Data'] = len(train_df)
            summary['Total Testing Data'] = len(test_df)
            summary['Total Number of Features in Dataset'] = len(var)
            summary['Total no of Layers'] = layer + 2
            summary['No of Hidden Layer'] = layer
            summary['No of Neuron in Hidden Layer 1'] = no_of_neuron
            summary['No of Neuron in Hidden Layer 2'] = no_of_neuron_h2
            summary['Activation Function'] = 'Sigmoid'
            summary['Learning rate'] = lr
            summary['Epochs'] = epoch
            summary['Model Accuracy'] = round(cc_percentage, 2)
            summary['Features'] = str(var)
            summary_df = pd.DataFrame(summary.items(),
                                      columns=['Parameters', 'Value'])

            db.put('sgd.data_train', train_df)
            db.put('sgd.data_test', test_df)
            db.put('sgd.quantized_classes', quantized_classes)
            db.put('sgd.reverse_quantized_classes', reverse_quantized_classes)
            db.put('sgd.model', model)
            db.put('sgd.model_yu', yu)
            db.put('sgd.summary', summary)
            confusion_df = get_confusion_matrix(test_df, c, var, model, yu,
                                                reverse_quantized_classes)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " +
                                    str(e))

        trace = go.Scatter(x=loss_dict['Epoch_no'],
                           y=loss_dict['Loss'],
                           line=dict(width=2, color='rgb(106, 181, 135)'))
        convergence_title = go.Layout(title='Convergence Plot',
                                      hovermode='closest',
                                      xaxis={'title': 'Epoch'},
                                      yaxis={'title': 'Loss Function'})
        convergence_fig = go.Figure(data=[trace], layout=convergence_title)

        div = html.Div([
            html.H2('Class Grouping in Data:'),
            dbc.Table.from_dataframe(distinct_count_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.H2('Model Parameters & Summary:'),
            dbc.Table.from_dataframe(summary_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.Br(),
            dcc.Graph(id='sgd-convergence-plot', figure=convergence_fig),
            html.H2('Confusion Matrix (Precision & Recall):'),
            dbc.Table.from_dataframe(confusion_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.Br(),
            html.Br()
        ])
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div
def nlcl_model_predict(n_clicks):
    c = db.get('nlcl.model_class')
    predict_data = db.get("nlcl.model_prediction_data")
    test_df = db.get('nlcl.data_test')
    #summary = db.get('nlcl.model_summary')
    model = db.get('nlcl.model_instance')
    var = db.get('nlcl.model_variables')
    n_var = len(var)
    if predict_data is None:
        return ("" , "")
    if len(predict_data.split(',')) != n_var:
        return (common.error_msg('Enter Valid Prediction Data!!'), "")
    try:
        cols = [] + var
        cols.append(c)
        feature_vector = get_predict_data_list(predict_data)
        #TODO Team 3 Predict API is not available.
        ""
    except Exception as e:
        return (common.error_msg("Exception during prediction: " + str(e)), "")

    clazz_col = c
    test_df.columns = cols
    df = test_df
    x_col = var[0]
    y_col = var[1]
    xp = [feature_vector[0]]
    yp = [feature_vector[1]]
    x1, y1 = get_rect_coordinates(model[0])
    x2, y2 = get_rect_coordinates(model[1])
    x3, y3 = get_rect_coordinates(model[2])
    graph_data = [
        go.Scatter(
            x=df[df[clazz_col] == clazz][x_col],
            y=df[df[clazz_col] == clazz][y_col],
            text=df[df[clazz_col] == clazz][clazz_col],
            mode='markers',
            opacity=0.8,
            marker={
                'size': 15,
                'line': {'width': 0.5, 'color': 'white'}
            },
            name=clazz
        ) for clazz in df[clazz_col].unique()
    ]
    graph_data.append(go.Scatter(x=xp, y=yp,
        mode='markers',
        opacity=0.8,
        marker={
            'size': 20,
            'line': {'width': 0.5, 'color': 'white'}
        },
        text = 'Predicted - DataPoint',
        name = 'Predicted - DataPoint'))
    graph_data.append(go.Scatter(x=x1, y=y1, text = 'Specific Rectangle', name = 'Specific Rectangle'))
    graph_data.append(go.Scatter(x=x3, y=y3, text = 'Optimal Rectangle', name = 'Optimal Rectangle'))
    graph_data.append(go.Scatter(x=x2, y=y2, text = 'Generic Rectangle', name = 'Generic Rectangle'))

    graph = dcc.Graph(
        id='nlcl-x-vs-y-predict',
        figure={
            'data': graph_data,
            'layout': dict(
                title='Boundaries, Predict Data Point & Test Data Set Scatter Plot',
                xaxis={'title': x_col},
                yaxis={'title': y_col},
                margin={'l': 40, 'b': 40},
                legend={'x': 0, 'y': 1},
                hovermode='closest'
            )
        }
    )

    div = html.Div([
        graph
    ])
    return ("", div)
def selected_file(href):
    file = db.get("file")
    format = db.get("format")
    return common.msg("Selected File: " + file + " Selected Format: " + format)
def cl_model_predict(n_clicks):
    predict_data = db.get("cl.model_prediction_data")
    summary = db.get('cl.model_summary')
    lr_instance = db.get('cl.model_instance')
    n_var = summary['Total Number of Features in Dataset']
    if predict_data is None:
        return ("", "")
    if len(predict_data.split(',')) != n_var:
        return (common.error_msg('Enter Valid Prediction Data!!'), "")
    try:
        feature_vector = get_predict_data_list(predict_data)
        feature_vector = np.array(feature_vector)
        prediction = lr_instance.predict(feature_vector)
        db.put('cl.prediction', prediction)
    except Exception as e:
        traceback.print_exc()
        return (common.error_msg("Exception during prediction: " + str(e)), "")
    df = db.get('cl.data_train')
    df = df.iloc[:, :-1]
    div = html.Div([
        html.Div(
            [html.H2("Predicted & Testing Data Scatter Plot")],
            style={
                'width': '100%',
                'display': 'flex',
                'align-items': 'center',
                'justify-content': 'center'
            }),
        dbc.Row([
            dbc.Col([
                dbc.Label("Select X Axis"),
                dcc.Dropdown(id='cl-x-axis-predict',
                             options=[{
                                 'label': col,
                                 'value': col
                             } for col in [*df]],
                             value=None,
                             multi=False),
                html.Br(),
                dbc.Label("Select Y Axis"),
                dcc.Dropdown(id='cl-y-axis-predict',
                             options=[{
                                 'label': col,
                                 'value': col
                             } for col in [*df]],
                             value=None,
                             multi=False),
                html.Br(),
                dbc.Button("Plot",
                           color="primary",
                           id='cl-predict-scatter-plot-button'),
                html.Div([], id="cl-x-axis-predict-do-nothing"),
                html.Div([], id="cl-y-axis-predict-do-nothing")
            ],
                    md=2,
                    style={
                        'margin': '10px',
                        'font-size': '16px'
                    }),
            dbc.Col([], md=9, id="cl-scatter-plot-predict")
        ]),
    ])
    return (common.success_msg('Predicted/Classified Class = ' + prediction),
            div)
def cl_model_train(n_clicks):
    c = db.get('cl.model_class')
    var = db.get('cl.model_variables')
    train = db.get('cl.model_train')
    #test = db.get('cl.model_test')
    lr = db.get('cl.model_lr')
    epoch = db.get('cl.model_epoch')
    if c is None and var is None and train is None and lr is None and epoch is None:
        div = ""
    elif train is None or train < 0 or train > 100:
        div = common.error_msg('Training % should be between 0 - 100 !!')
    elif (not c is None) and (not var is None) and (not train is None) and (
            not lr is None) and (not epoch is None):
        #parameters = "Training Data = " + str(train) + " % Testing Data = " + str(100 - train) + " % Learning rate = " + str(lr) + " Epoch = " + str(epoch)

        try:
            cols = [] + var
            cols.append(c)
            df = db.get('cl.data')
            df = df[cols]

            train_df, test_df = common.split_df(df, c, train)

            distinct_count_df_total = get_distinct_count_df(
                df, c, 'Total Count')
            distinct_count_df_train = get_distinct_count_df(
                train_df, c, 'Training Count')
            distinct_count_df_test = get_distinct_count_df(
                test_df, c, 'Testing Count')

            distinct_count_df = distinct_count_df_total.join(
                distinct_count_df_train.set_index('Class'), on='Class')
            distinct_count_df = distinct_count_df.join(
                distinct_count_df_test.set_index('Class'), on='Class')

            instanceOfLR, summary = linearClassifier(train_df, test_df,
                                                     len(var), lr, epoch)
            summary['Features'] = str(var)
            summary_df = pd.DataFrame(summary.items(),
                                      columns=['Parameters', 'Value'])
            db.put('cl.data_train', train_df)
            db.put('cl.data_test', test_df)
            db.put('cl.model_summary', summary)
            db.put('cl.model_instance', instanceOfLR)
            confusion_df = get_confusion_matrix(test_df, c, var, instanceOfLR)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " +
                                    str(e))

        div = html.Div([
            html.H2('Class Grouping in Data:'),
            dbc.Table.from_dataframe(distinct_count_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.H2('Model Parameters & Summary:'),
            dbc.Table.from_dataframe(summary_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.H2('Confusion Matrix (Precision & Recall):'),
            dbc.Table.from_dataframe(confusion_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
            html.H2('Prediction/Classification:'),
            html.P('Features to be Predicted (comma separated): ' +
                   ','.join(var),
                   style={'font-size': '16px'}),
            dbc.Input(id="cl-prediction-data",
                      placeholder=','.join(var),
                      type="text"),
            html.Br(),
            dbc.Button("Predict", color="primary", id='cl-predict'),
            html.Div([], id="cl-prediction"),
            html.Div([], id="cl-predicted-scatter-plot")
        ])
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div
def nlcl_model_train(n_clicks):
    c = db.get('nlcl.model_class')
    var = db.get('nlcl.model_variables')
    train = db.get('nlcl.model_train')
    if c is None and var is None and train is None:
        div = ""
    elif train is None or train < 0 or train > 100:
        div = common.error_msg('Training % should be between 0 - 100 !!')
    elif len(var) != 2:
        div = common.error_msg('Select Two Features!!')
    elif (not c is None) and (not var is None) and (not train is None):

        try:
            cols = [] + var
            cols.append(c)
            df = db.get('nlcl.data')
            df = df[cols]


            train_df, test_df = common.split_df(df, c, train)
            train_df.columns = ['X1', 'X2', 'Class']

            distinct_count_df_total = get_distinct_count_df(df, c, 'Total Count')
            distinct_count_df_train = get_distinct_count_df(train_df, c, 'Training Count')
            distinct_count_df_test = get_distinct_count_df(test_df, c, 'Testing Count')

            distinct_count_df = distinct_count_df_total.join(distinct_count_df_train.set_index('Class'), on='Class')
            distinct_count_df = distinct_count_df.join(distinct_count_df_test.set_index('Class'), on='Class')

            model = non_separable_train(train_df)
            print(model)
            summary = {}
            summary['Total Training Data'] = len(train_df)
            summary['Total Testing Data'] = len(test_df)
            summary['Total Number of Features in Dataset'] = len(var)
            summary['Model Accuracy %'] = 'TODO'
            summary['Features'] = str(var)
            summary_df = pd.DataFrame(summary.items(), columns=['Parameters', 'Value'])

            db.put('nlcl.data_train', train_df)
            db.put('nlcl.data_test', test_df)
            db.put('nlcl.model_summary', summary)
            db.put('nlcl.model_instance', model)
            #confusion_df = get_confusion_matrix(test_df, c, var, instanceOfLR)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " + str(e))

        clazz_col = c
        train_df.columns = cols
        df = train_df
        x_col = var[0]
        y_col = var[1]
        x1, y1 = get_rect_coordinates(model[0])
        x2, y2 = get_rect_coordinates(model[1])
        x3, y3 = get_rect_coordinates(model[2])
        graph_data = [
            go.Scatter(
                x=df[df[clazz_col] == clazz][x_col],
                y=df[df[clazz_col] == clazz][y_col],
                text=df[df[clazz_col] == clazz][clazz_col],
                mode='markers',
                opacity=0.8,
                marker={
                    'size': 15,
                    'line': {'width': 0.5, 'color': 'white'}
                },
                name=clazz
            ) for clazz in df[clazz_col].unique()
        ]
        graph_data.append(go.Scatter(x=x1, y=y1, text = 'Specific Rectangle', name = 'Specific Rectangle'))
        graph_data.append(go.Scatter(x=x3, y=y3, text = 'Optimal Rectangle', name = 'Optimal Rectangle'))
        graph_data.append(go.Scatter(x=x2, y=y2, text = 'Generic Rectangle', name = 'Generic Rectangle'))

        graph = dcc.Graph(
            id='nlcl-x-vs-y-rectangle',
            figure={
                'data': graph_data,
                'layout': dict(
                    title='Boundaries & Train Data Set Scatter Plot',
                    xaxis={'title': x_col},
                    yaxis={'title': y_col},
                    margin={'l': 40, 'b': 40},
                    legend={'x': 0, 'y': 1},
                    hovermode='closest'
                )
            }
        )

        div = html.Div([
            html.H2('Class Grouping in Data:'),
            dbc.Table.from_dataframe(distinct_count_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.H2('Model Parameters & Summary:'),
            dbc.Table.from_dataframe(summary_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.Br(),
            graph,
            #html.H2('Confusion Matrix (Precision & Recall):'),
            #dbc.Table.from_dataframe(confusion_df, striped=True, bordered=True, hover=True, style = common.table_style),
            html.H2('Prediction/Classification:'),
            html.P('Features to be Predicted (comma separated): ' + ','.join(var), style = {'font-size': '16px'}),
            dbc.Input(id="nlcl-prediction-data", placeholder=','.join(var), type="text"),
            html.Br(),
            dbc.Button("Predict", color="primary", id = 'nlcl-predict'),
            html.Div([], id = "nlcl-prediction"),
            html.Div([],id = "nlcl-predicted-scatter-plot")
        ])
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div
def selected_file(href):
    file = db.get("file")
    format = db.get("format")
    sep = db.get("file_separator")
    header = db.get("file_header")
    df = db.get("data")
    div = None
    if file is None:
        div = ""
    elif df is None:
        div = [
            common.msg("Selected File: " + file + " Selected Format: " +
                       format),
            common.error_msg("Please apply file properties!!")
        ]
    else:
        msg = "File=" + file + "  Format=" + format + "  Separator=" + sep + "  Header=" + str(
            header)
        table = dbc.Table.from_dataframe(df.head(10),
                                         striped=True,
                                         bordered=True,
                                         hover=True,
                                         style=common.table_style)
        div = html.Div([
            common.msg(msg), table,
            html.Br(),
            html.Div(
                [common.msg("Scatter Plot")],
                style={
                    'width': '100%',
                    'display': 'flex',
                    'align-items': 'center',
                    'justify-content': 'center'
                }),
            dbc.Row([
                dbc.Col([
                    dbc.Label("Select Class"),
                    dcc.Dropdown(id='cl-class',
                                 options=[{
                                     'label': col,
                                     'value': col
                                 } for col in [*df]],
                                 value=None,
                                 multi=False),
                    html.Br(),
                    dbc.Label("Select X Axis"),
                    dcc.Dropdown(id='cl-x-axis',
                                 options=[{
                                     'label': col,
                                     'value': col
                                 } for col in [*df]],
                                 value=None,
                                 multi=False),
                    html.Br(),
                    dbc.Label("Select Y Axis"),
                    dcc.Dropdown(id='cl-y-axis',
                                 options=[{
                                     'label': col,
                                     'value': col
                                 } for col in [*df]],
                                 value=None,
                                 multi=False),
                    html.Br(),
                    dbc.Button(
                        "Plot", color="primary", id='cl-scatter-plot-button'),
                    html.Div([], id="cl-class-do-nothing"),
                    html.Div([], id="cl-x-axis-do-nothing"),
                    html.Div([], id="cl-y-axis-do-nothing")
                ],
                        md=2,
                        style={
                            'margin': '10px',
                            'font-size': '16px'
                        }),
                dbc.Col([], md=9, id="cl-scatter-plot")
            ])
        ])
    return div
def nlcl_display_selected_file_scatter_plot(value):
    db_value = db.get("nlcl.file")
    if value is None and db_value is None:
        return common.msg("Please select a cleaned file to proceed!!")
    elif value is None and not db_value is None:
        value = db_value

    db.put("nlcl.file", value)
    file = value
    path = FileUtils.path('clean', file)
    df = DataUtils.read_csv(path)
    db.put("nlcl.data", df)

    stats = df.describe(include = 'all').head(6).round(5)
    stats.insert(loc=0, column='Statistics', value=['Count','unique','top','freq','Mean','Standard Deviation'])
    stats = stats.drop(stats.index[[1,2,3]])

    div = html.Div([
        common.msg("Selected cleaned file: "+ file),
        dbc.Table.from_dataframe(df.head(10), striped=True, bordered=True, hover=True, style = common.table_style),
        html.Div([html.H3("Data Statistics")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}),
        dbc.Table.from_dataframe(stats, striped=True, bordered=True, hover=True, style = common.table_style),
        html.Br(),
        html.Div([html.H2("Scatter Plot")], style={'width': '100%', 'display': 'flex', 'align-items': 'center', 'justify-content': 'center'}),
        dbc.Row([
            dbc.Col([
                dbc.Label("Select Class"),
                dcc.Dropdown(
                    id = 'nlcl-class',
                    options=[{'label':col, 'value':col} for col in [*df]],
                    value=None,
                    multi=False
                ),
                html.Br(),
                dbc.Label("Select X Axis"),
                dcc.Dropdown(
                    id = 'nlcl-x-axis',
                    options=[{'label':col, 'value':col} for col in [*df]],
                    value=None,
                    multi=False
                ),
                html.Br(),
                dbc.Label("Select Y Axis"),
                dcc.Dropdown(
                    id = 'nlcl-y-axis',
                    options=[{'label':col, 'value':col} for col in [*df]],
                    value=None,
                    multi=False
                ),
                html.Br(),
                dbc.Button("Plot", color="primary", id = 'nlcl-scatter-plot-button'),
                html.Div([], id = "nlcl-class-do-nothing"),
                html.Div([], id = "nlcl-x-axis-do-nothing"),
                html.Div([], id = "nlcl-y-axis-do-nothing")
            ], md=2,
            style = {'margin': '10px', 'font-size': '16px'}),
            dbc.Col([], md=9, id="nlcl-scatter-plot")
        ]),
        html.Br(),
        get_nlcl_model_properties_div(df),
        html.Div([], id = "nlcl-trained-model", style = {'margin': '10px'}),
    ])

    return div
示例#21
0
def pca_model_train(n_clicks):
    var = db.get('pca.model_variables')
    if var is None:
        div = ""
    elif (not var is None):
        try:
            df = db.get('pca.data')
            cov_mat, eig_vals, eig_vecs, eig_pairs = perform_pca(
                df[var].values)
            cov_df = pd.DataFrame(cov_mat).round(4)
        except Exception as e:
            traceback.print_exc()
            return common.error_msg("Exception during training model: " +
                                    str(e))

        list = [
            html.H2('Covariance Matrix:'),
            dbc.Table.from_dataframe(cov_df,
                                     striped=True,
                                     bordered=True,
                                     hover=True,
                                     style=common.table_style),
        ]

        i = 0
        for k, v in eig_pairs.items():
            i = i + 1
            list.append(html.H2('Eigen Values: ' + str(round(k, 4))))
            list.append(html.H2('Eigen Vector: ' + str(v)))
            list.append(html.Br())

        if len(var) == 2:
            x_col = var[0]
            y_col = var[1]
            xmax = max(df[x_col])
            i = 0
            for key in sorted(eig_pairs.keys(), reverse=True):
                if i == 0:
                    x1 = eig_pairs[key][0]
                    y1 = eig_pairs[key][1]
                    m = y1 / x1
                    ymax = m * xmax
                    x1 = [0, x1, xmax]
                    y1 = [0, y1, ymax]
                    k1 = str(round(key, 4))
                    i = i + 1
                else:
                    x2 = eig_pairs[key][0]
                    y2 = eig_pairs[key][1]
                    x2 = [0, x2]
                    y2 = [0, y2]
                    k2 = str(round(key, 4))

            graph = dcc.Graph(
                id='pca-x-vs-y',
                figure={
                    'data': [
                        go.Scatter(x=df[x_col],
                                   y=df[y_col],
                                   mode='markers',
                                   opacity=0.8,
                                   marker={
                                       'size': 15,
                                       'line': {
                                           'width': 0.5,
                                           'color': 'white'
                                       }
                                   },
                                   name='Data Points'),
                        go.Scatter(x=x1,
                                   y=y1,
                                   mode='lines',
                                   opacity=0.8,
                                   name='Eigen Vector - V1 - ' + k1),
                        go.Scatter(x=x2,
                                   y=y2,
                                   mode='lines',
                                   opacity=0.8,
                                   name='Eigen Vector - V2 - ' + k2)
                    ],
                    'layout':
                    dict(
                        title='Scatter Plot',
                        xaxis={'title': x_col},
                        yaxis={'title': y_col},
                        margin={
                            'l': 40,
                            'b': 40
                        },
                        #legend={'x': 0, 'y': 1},
                        hovermode='closest')
                })
            list.append(graph)

        div = html.Div(list)
    else:
        div = common.error_msg('Select Proper Model Parameters!!')
    return div