Пример #1
0
def test_1():
    """
    Tests that the match factor for a known learning set and test case is close to the known value

    """

    script_dir = os.path.dirname(__file__)
    file1 = os.path.join(script_dir, '../assets/bladeCombined.csv')

    store_data, records = get_data(file1)

    conf_results, results = find_associations(store_data,
                                              records,
                                              support=0.0003,
                                              confidence=0.01,
                                              lift=0.1)

    thresh_results = get_top_results(conf_results, 0.7)

    test_file = os.path.join(script_dir, '../assets/jigsawQuery.csv')

    test_data, test_records = get_data(test_file)

    learned_dict, matched, overmatched, unmatched, match_factor = match(
        thresh_results, test_records)

    assert np.allclose(0.82051, match_factor)
Пример #2
0
def test1():
    """ Test to find F1 score by manually selecting product id(s) from original data to test """

    script_dir = os.path.dirname(__file__)
    file_to_learn = os.path.join(script_dir,
                                 '../autofunc/assets/consumer_systems.csv')

    train_data = make_df(file_to_learn)

    # Use a threshold to get the top XX% of frequency values
    threshold = 0.7

    ## Choose ID(s) from learning file to separate into the testing set
    test_ids = [691, 169]

    test_df, train_df = split_learning_verification(train_data, test_ids)

    test_list = df_to_list(test_df)

    comb_sort = counter_pandas(train_df)
    thresh_results = get_top_results(comb_sort, threshold)

    # Find the F1 score
    learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(
        thresh_results, test_list)

    assert len(learned_dict) != 0
    assert f1 > 0
def test_1():
    """
    Tests that the top function-flow combination for the component "screw" is "couple solid"
    """

    script_dir = os.path.dirname(__file__)
    file1 = os.path.join(script_dir, '../assets/bladeCombined.csv')

    store_data, records = get_data(file1)

    conf_results, results = find_associations(store_data, records)

    thresh_results = get_top_results(conf_results, 0.7)

    assert thresh_results['screw'][0][0] == 'couple solid'
def test_2():
    """
    Tests that the top 70% of function-flow combinations for the component "screw" only has one result
    """

    script_dir = os.path.dirname(__file__)
    file1 = os.path.join(script_dir, '../assets/bladeCombined.csv')

    store_data, records = get_data(file1)

    conf_results, results = find_associations(store_data, records)

    thresh_results = get_top_results(conf_results, 0.7)

    assert len(thresh_results['screw']) == 1
Пример #5
0
def test_get_top_results():

    """
    Tests that the top 70% of function-flow combinations for the component "screw" only has one result
    """

    script_dir = os.path.dirname(__file__)
    file2 = os.path.join(script_dir, '../assets/bladeCombined.csv')

    comb_sort = count_stuff(file2)

    threshold = 0.7
    thresh_results = get_top_results(comb_sort, threshold)

    assert len(thresh_results['screw']) == 1
def test_1():
    """ Example showing how to automate result finding with probability values  """

    # Dataset used for data mining
    script_dir = os.path.dirname(__file__)
    file1 = os.path.join(script_dir, '../assets/bladeCombined.csv')

    comb_sort = count_stuff(file1)

    # Use a threshold to get the top XX% of confidence values
    threshold = 0.5
    thresh_results = get_top_results(comb_sort, threshold)

    # Use a known product for verification
    input_file = os.path.join(script_dir, '../assets/InputExample.csv')

    # Get dictionary of functions and flows for each component based on data mining
    results, unmatched = get_func_rep(thresh_results, input_file, True)

    assert results['screw'][0][0] == 'couple solid'
    assert 'cheese' in unmatched
Пример #7
0
def test_1():
    """ Example showing how to automate functional representation with frequency values"""

    # Dataset used for data mining
    script_dir = os.path.dirname(__file__)
    file_to_test = os.path.join(script_dir,
                                '../autofunc/assets/consumer_systems.csv')

    test_data = pd.read_csv(file_to_test)
    combos_sorted = counter_pandas(test_data)

    # Use a threshold to get the top XX% of confidence values
    threshold = 0.5
    thresh_results = get_top_results(combos_sorted, threshold)

    # Use a known product for verification
    input_file = os.path.join(script_dir,
                              '../autofunc/assets/InputExample.csv')

    # Get dictionary of functions and flows for each component based on data mining
    results, unmatched = get_func_rep(thresh_results, input_file, True)

    assert results['screw'][0][0] == 'couple solid'
    assert 'cheese' in unmatched
Пример #8
0
import os.path
import pandas as pd

""" Example showing how to find F1 score using separate file of input components """


# Dataset used for data mining
script_dir = os.path.dirname(__file__)
file_to_learn = os.path.join(script_dir, '../autofunc/assets/consumer_systems.csv')

train_data = pd.read_csv(file_to_learn)
combos_sorted = counter_pandas(train_data)

# Use a threshold to get the top XX% of confidence values
threshold = 0.5
thresh_results = get_top_results(combos_sorted, threshold)

# Use a known product for verification
test_file = os.path.join(script_dir, '../autofunc/assets/jigsawQuery_headers.csv')
test_data = pd.read_csv(test_file)
test_list = df_to_list(test_data)

learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(thresh_results,
                                                                                                    test_list)


# Optional write to file - uncomment and rename to write file
# write_results_from_dict(learned_dict, 'test1.csv')


Пример #9
0
    # Making folds using list comprehension
    folds = [
        test_ids[i * n:(i + 1) * n]
        for i in range((len(test_ids) + n - 1) // n)
    ]

    for e in folds:
        verification_ids = e

        ver_df, learn_df = split_learning_verification(df, verification_ids)

        ver_list = df_to_list(ver_df)

        if not bd:
            comb_sort, counts, combos = counter_pandas_with_counts(learn_df)
            thresh_results = get_top_results(comb_sort, threshold)

            # Find the F1 score of the verification test by comparing the learned results with the known function/flows
            learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(
                thresh_results, ver_list)

        if bd:
            bd_comb_sort = counter_pandas(bd_df)
            bd_thresh_results = get_top_results(bd_comb_sort, threshold)
            learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(
                bd_thresh_results, ver_list)

        precisions.append(precision)
        recalls.append(recall)

        print(e)
Пример #10
0
        train_comps = list(keep_df.comp.unique())

        if train_comps:
            comp_ratio = len(train_comps) / num_all_comps
            comp_ratios.append((len(keep_ids), i, comp_ratio))

        if comp_ratio > 0.7 and len(keep_ids) < 40:
            keepers.append(keep_ids)

        scatter_keep.append((comp_ratio, len(keep_ids)))

        for t in range(10, 100, 5):
            threshold = t / 100
            print(test_id, ' ', ps_thresh, ' ', threshold)

            thresh_results = get_top_results(comb_sort, threshold)

            if not keep_ids:
                f1 = 0
                num_train_comps = 0
            else:
                # Find the F1 score of the verification test by comparing the learned results with the known function/flows
                learned_dict, matched, overmatched, unmatched, recall, precision, f1 = precision_recall(
                    thresh_results, test_list)
                num_train_comps = len(train_comps)

            save_data.append((test_id, ps_thresh, threshold, len(keep_ids), f1,
                              num_train_comps / num_all_comps))

            points.append((ps_thresh, threshold, f1))
Пример #11
0
from autofunc.get_match_factor import match
from autofunc.get_top_results import get_top_results
from autofunc.find_associations import find_associations
from autofunc.get_data import get_data
import os.path
""" Example showing how to find the match factor using association rules """

# Dataset used for data mining
script_dir = os.path.dirname(__file__)
file1 = os.path.join(script_dir, '../assets/bladeCombined.csv')

# Convert file to data frame and list
store_data, records = get_data(file1)

# Use Association Rules to sort the functions/flows of components by confidence
conf_results, results = find_associations(store_data, records)

# Use a threshold to get the top XX% of confidence values
thresh_results = get_top_results(conf_results, 0.7)

# Use a known product for verification
test_file = os.path.join(script_dir, '../assets/jigsawQuery.csv')

test_data, test_records = get_data(test_file)

# Find the match factor of the verification test by comparing the learned results with the known function/flows
learned_dict, matched, overmatched, unmatched, match_factor = match(
    thresh_results, test_records)

print('Match factor = {0:.5f}'.format(match_factor))