示例#1
0
def train_all_models():

    with open('accuracy.csv', 'w') as f:
        f.write('category,tp,fp,tn,fn,precision,recall,fscore')
        f.write('\n')

    for c in categories:
        generate(c)
        train_for_category(c, 'KNN')
示例#2
0
def run_T_detection_trials(trials=100, size=1000, cprivacy=0.1, nprivacy=1, distinct=10, cskew=0.33, nskew=0.33):

    failure = 0
    for t in range(0, trials):
        dataset = generate(S=size, p=cprivacy, b=nprivacy, N=distinct, z1=cskew, z2=nskew)
        res = distinct_count(dataset)
        failure = failure + (res[0] == res[2])

    return (failure + 0.0) / trials
示例#3
0
def train_with_optimized_feature_size(category, classifier):

    clfs_with_score = {}
    
    for i in range(10, 14):

        with open('num_of_top_words_as_feature.pickle', 'w+b') as f:
            num_of_top_words_as_feature = i
            pickle.dump(num_of_top_words_as_feature, f)

        generate(category)
        f1_score, cm, clf = train_for_category(category, classifier)
        clfs_with_score[i] = f1_score

    max_score = 0
    best_feature_size = 10
    clf = None
    
    for i in clfs_with_score:
        if max_score < clfs_with_score[i]:
            print i
            best_feature_size = i
            max_score = clfs_with_score[i]

    print category + " " + str(max_score) + " " + str(best_feature_size)

    with open('num_of_top_words_as_feature.pickle', 'w+b') as f:
        pickle.dump(best_feature_size, f)

    #trained again so that all csv files are updated for correct feature size
    generate(category)
    f1_score, cm, clf = train_for_category(category, classifier)
    print f1_score

    with open('model/' + category, 'w+b') as f:
        pickle.dump(clf, f)

    with open('data/' + category + '/feature_size.pickle', 'w') as f:
        pickle.dump(best_feature_size, f)

    return clf, max_score
示例#4
0
def run_T_trials(
    trials=100,
    size=1000,
    cprivacy=0.1,
    nprivacy=1,
    distinct=10,
    selectivity=0.5,
    errorrate=0.21,
    cskew=0.33,
    nskew=0.33,
):

    sumr = []
    countr = []
    avgr = []

    for t in range(0, trials):
        # print "Running Trial ", t
        dataset = generate(S=size, p=cprivacy, b=nprivacy, N=distinct, z1=cskew, z2=nskew)
        truepred = range(
            int(round((1 - selectivity) * distinct)), distinct
        )  # numpy.random.choice(range(0,distinct),size=int(round(selectivity*distinct)),replace=False)
        corruptpred = []

        if int(round(errorrate * distinct)) >= 1:
            corruptpred = numpy.random.choice(range(0, distinct), size=int(round(errorrate * distinct)), replace=False)

        corruptpred = list(set(truepred).union(set(corruptpred)))

        sum_cresult = sumq(dataset, predicate=truepred, p=cprivacy, b=nprivacy)
        count_cresult = countq(dataset, predicate=truepred, p=cprivacy)
        avg_cresult = avgq(dataset, predicate=truepred, p=cprivacy, b=nprivacy)

        sum_dresult = sumq(dataset, predicate=corruptpred, p=cprivacy, b=nprivacy)
        count_dresult = countq(dataset, predicate=corruptpred, p=cprivacy)
        avg_dresult = avgq(dataset, predicate=corruptpred, p=cprivacy, b=nprivacy)

        if count_cresult[2] > 0:
            sumr.append((sum_cresult[0], sum_dresult[1], sum_cresult[2], sum_dresult[2], sum_cresult[1]))
            countr.append((count_cresult[0], count_dresult[1], count_cresult[2], count_dresult[2], count_cresult[1]))
            avgr.append((avg_cresult[0], avg_dresult[1], avg_cresult[2], avg_dresult[2], count_cresult[1]))

    return (sumr, countr, avgr)
示例#5
0
                                                 batch_size=100,
                                                 max_anchor_size=None,
                                                 stop_on_first=False,
                                                 desired_label=None,
                                                 beam_size=4)
                print('Anchor: %s' % (' AND '.join(exp.names())))

                coverage_tab = None
                precision_tab = None
                # If the dataset is one of the 3 generated, pick the anchors informations and compute the coverage and precision on test dataset
                if 'generate' in dataset_name:
                    anchors = exp.names()
                    _, _, pick_anchors_informations = generate(
                        dataset.test[idx].reshape(1, -1)[0][0],
                        dataset.test[idx].reshape(1, -1)[0][1],
                        anchors,
                        blackbox=c,
                        X=X,
                        Y=Y)
                    coverage_tab = compute_coverage(dataset, exp, anchors,
                                                    pick_anchors_informations)
                    precision_tab = compute_precision(
                        dataset, dataset.labels_test,
                        predict_fn(dataset.test[idx].reshape(1, -1))[0], exp,
                        anchors, pick_anchors_informations)
                """
                print('Anchor: %s' % (' AND '.join(exp.names())))
                print('Precision: %.2f' % exp.precision())
                print('Coverage: %.2f' % exp.coverage())
                """
    choice = input("Generate dataset? [y/n]: ")

iterations = 5
mode = CameraPose.NONE
csvfile = './outputs/flowers_dataset.csv'
csvMode = 'w' # 'a' => append / 'w' => write (overwrites existing csv file)
meshes = [(flower_mesh, 'flower'), (stem_mesh, 'stem'), (center_mesh, 'center')]
skip_default_view = False
salt = '001'
offset = 0
skip_sub_mesh = {name: False for (mesh, name) in meshes}

if flower_type in no_stem_flowers:
    skip_sub_mesh['stem'] = True

if choice == 'y':

    # Remove camera that was used as preview from the scene
    scene.remove_node(cam_node)

    generate(
        scene=scene, renderer=r, flags=flags, alpha=alpha, beta=beta, camera=camera, 
        iterations=iterations, flower_pose=flower_pose, meshes=meshes, 
        light_intensitites = varying_light_intensities,
        skip_sub_mesh= skip_sub_mesh, at_x=at_x, at_y=at_y, at_z=at_z,
        skip_default_view=skip_default_view, offset=offset, 
        distance=distance, mode=mode, csvfile=csvfile, csvMode=csvMode,
        flower_name=file_name, salt=salt
        )

print("Finished")