Пример #1
0
def parse_s(s):
    fn_data = Name_functions.DS_file(s)
    fn_subset = Name_functions.DS_reduced_ids_DSJ(s)
    x, y = Di(fn_data).split_data(int(s), fn_subset_ids=fn_subset)
    print('\tM^{}_j ... '.format(s), end='', flush=True)
    good_splits = 0

    for i in sorted(x):
        fn_model = Name_functions.model_SJ(s, i)
        c, cc = np.unique(y[i], return_counts=True)

        if min(cc) < cv * 2:
            index = np.where(cc == np.min(cc))
            continue
        if len(c) <= 1:
            continue
        if os.path.exists(fn_model):
            good_splits += 1
            continue
        else:
            generate_model(x[i], y[i], s, i)
            good_splits += 1
            continue
    print('Done ({}/{} D^{}_j met requirements)'.format(
        good_splits, len(x), s))
    return good_splits, len(x), 100 * good_splits / len(x)
Пример #2
0
def parse_ms(s):
    fn_data = Name_functions.DS_file(s)
    x, y, time, case_id = Di(fn_data).get_data(return_identifiers=True,
                                               return_split_values=True)

    print('\tM^{}_j ... '.format(s), end='', flush=True)

    # S predictions
    for i in sorted([int(i) for i in Name_functions.S_J_values(s)],
                    reverse=True):

        if Filefunctions.exists(Name_functions.DSJ_probabilities(s, i)):
            continue

        model_i = Model_Functions.loadModel(Name_functions.model_SJ(s, i))
        model_labels = model_i.classes_.tolist()
        model_end_time = Name_functions.SJ_period_end_time(s, i)

        with open(Name_functions.DSJ_probabilities(s, i), 'w+') as wf:
            for dx, t, idn in zip(x, time, case_id):
                if t < model_end_time:
                    # Only test if the model existed before the data point
                    continue
                model_predictions = model_i.predict_proba(dx.reshape(1, -1))[0]
                actual_predictions = [
                    (0 if (i not in model_labels) else
                     model_predictions[model_labels.index(i)])
                    for i in all_labels
                ]
                wf.write('{};{};{}\n'.format(
                    idn, t,
                    ';'.join(['{:4f}'.format(x) for x in actual_predictions])))
    print('Done')

    # Naive predictions
    print('\tM^{}_naive ... '.format(s), end='', flush=True)
    if Filefunctions.exists(Name_functions.DS_probabilities_naive(s)):
        print('Already done')
        return

    model_naive = Model_Functions.loadModel(Name_functions.model_S_naive(s))
    model_naive_labels = model_naive.classes_.tolist()
    model_naive_end_time = Parameters.train_time_naive_stop

    with open(Name_functions.DS_probabilities_naive(s), 'w+') as wf:
        for dx, t, idn in zip(x, time, case_id):
            if t < model_naive_end_time:
                # Only test if the model existed before the data point
                continue

            model_predictions = model_naive.predict_proba(dx.reshape(1, -1))[0]
            actual_predictions = [
                (0 if (i not in model_naive_labels) else
                 model_predictions[model_naive_labels.index(i)])
                for i in all_labels
            ]
            wf.write('{};{};{}\n'.format(
                idn, t,
                ';'.join(['{:4f}'.format(x) for x in actual_predictions])))
    print('Done')
Пример #3
0
def generate_model(x, y, s, i):

    # Generate Train/Test
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y.ravel(),
                                                        random_state=0,
                                                        test_size=0.2)

    # Get the best model
    best_model = None
    best_score = -1
    for c in used_models:
        score, model = train_classifier(c, x_train, x_test, y_train, y_test)
        if score > best_score:
            best_score = score
            best_model = model

    # save the model
    Model_Functions.saveModel(best_model, Name_functions.model_SJ(s, i))