Python precompute_minimal примеры, mondrianforest_utils.precompute_minimal Python примеры использования

Пример #1

0

Показать файл

def pre_processing():
    """
    Processing settings and data
    :return: required data for execution
    """

    # Process and print settings
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    reset_random_seed(settings)

    # Loading data
    data = load_data(settings)
    param, cache = precompute_minimal(data, settings)

    return settings, data, param, cache

Пример #2

0

Показать файл

def perform_file(fileName):
    settings.dataset = fileName + ".arff"
    print(fileName)

    print("Loading data")
    data = load_data(settings)

    param, cache = precompute_minimal(data, settings)
    mf = MondrianForest(settings, data)

    for idx_minibatch in range(settings.n_minibatches):
        train_ids_current_minibatch = data['train_ids_partition']['current'][
            idx_minibatch]

        if idx_minibatch == 0:
            with open(settings.data_path + fileName + '.csv', 'w') as f:
                f.write("target;prediction\n")
            print("Training 0 batch", len(train_ids_current_minibatch))
            # Batch training for first minibatch
            mf.fit(data, train_ids_current_minibatch, settings, param, cache)
        else:
            print('Evaluation on batch', idx_minibatch, 'in', fileName)
            # Evaluate
            weights_prediction = np.ones(
                settings.n_mondrians) * 1.0 / settings.n_mondrians
            results = mf.evaluate_predictions(
                data, data['x_train'][train_ids_current_minibatch],
                data['y_train'][train_ids_current_minibatch], settings, param,
                weights_prediction, True)
            # prediction
            predictions = results[0]['pred_mean']
            real = data['y_train'][train_ids_current_minibatch].flatten()
            for i in range(len(predictions)):
                # print(i, predictions[i], real[i])
                with open(settings.data_path + fileName + '.csv', 'a') as f:
                    f.write("{0},{1}\n".format(real[i], predictions[i]))

            print("Training on next batch ", idx_minibatch,
                  len(train_ids_current_minibatch))
            # Online update
            mf.partial_fit(data, train_ids_current_minibatch, settings, param,
                           cache)
        print("Finished training ...")

Пример #3

0

Показать файл

def main():
    # Import settings from command line
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    reset_random_seed(settings)

    # Loading batch data
    batch_type_list = ['office', 'kitchen', 'bookstore']
    #batch_type_list = ['kitchen','office','bathroom','bedroom','bookstore','living_room']

    incremental_type_list = [
        'computer_room', 'home_office', 'office_kitchen', 'classroom'
    ]
    #incremental_type_list = ['study_space','classroom','computer_room','lobby','home_office','office_kitchen','playroom','reception_room','study','dining_room','cafeteria','furniture_store','conference_room','dinette','gym','storage_room','indoor_balcony','laundromat','printer_room','basement','recreation_room']
    training_perc = 0

    data, training_list, test_list = load_type_dataset(settings,
                                                       batch_type_list,
                                                       incremental_type_list,
                                                       training_perc)

    param, cache = precompute_minimal(data, settings)

    mf = MondrianForest(settings, data)

    batch_train_ids = data['train_ids_partition']['batch']
    print '\nBatch training on %d element...' % (len(batch_train_ids))

    mf.fit(data, batch_train_ids, settings, param, cache)

    print '...batch training done. \n'

    if training_perc > 0:
        incremental_train_ids = data['train_ids_partition']['incremental']
        print 'Incremental training on %d element...' % (
            len(incremental_train_ids))

        mf.partial_fit(data, incremental_train_ids, settings, param, cache)

        print '...incremental training done. \n'

    #Evaluation
    print 'Evaluation... \n'
    weights_prediction = np.ones(
        settings.n_mondrians) * 1.0 / settings.n_mondrians
    pred_forest_test, metrics_test = \
        mf.evaluate_predictions(data, data['x_test'], data['y_test'], \
        settings, param, weights_prediction, False)
    name_metric = settings.name_metric  # acc or mse
    metric_test = metrics_test[name_metric]
    tree_numleaves = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_numleaves[i_t] = len(tree.leaf_nodes)
    forest_numleaves = np.mean(tree_numleaves)
    f_stats = open(
        '/home/alberto/tesi/mondrianforest/src/results/statistics.txt', 'w')
    print '%s\t\tnum_leaves' % (name_metric)
    f_stats.write(str(name_metric) + ' : ' + str(metric_test))
    f_stats.write('\n')
    f_stats.write('num_leaves : ' + str(forest_numleaves))
    f_stats.write('\n')
    f_stats.write('\n')
    print '%.3f\t\t%.3f' % (metric_test, forest_numleaves)

    print '\nFinal forest stats:'
    f_stats.write('Final forest stats: \n')
    tree_stats = np.zeros((settings.n_mondrians, 2))
    tree_average_depth = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_stats[i_t, -2:] = np.array(
            [len(tree.leaf_nodes),
             len(tree.non_leaf_nodes)])
        tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0]
    print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \
            % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth))
    print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \
            % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))

    f_stats.write('mean(num_leaves) = ' + str(np.mean(tree_stats[:, -2])))
    f_stats.write('  mean(num_non_leaves) = ' +
                  str(np.mean(tree_stats[:, -1])))
    f_stats.write('  mean(tree_average_depth) = ' +
                  str(np.mean(tree_average_depth)) + '\n')

    f_stats.write('n_train = ' + str(data['n_train']))
    f_stats.write('  log_2(n_train) = ' + str(np.log2(data['n_train'])))
    f_stats.write('  mmean(tree_average_depth) = ' +
                  str(np.mean(tree_average_depth)) + ' +- ' +
                  str(np.std(tree_average_depth)) + '\n')

    f_stats.write(
        '\n------------------------------------------------------------------\n'
    )

    print '\n...evaluation done.'
    print 'Computing confusion matrices...'
    uf_dir = settings.data_path + '/unary_csv'
    labels_dir = settings.data_path + '/labels_csv'
    cm_res_dir = '../results/cm'
    for file_name in test_list:
        curr_uf_csv = uf_dir + '/' + file_name
        curr_lables_csv = labels_dir + '/' + file_name
        x_df = pd.read_csv(curr_uf_csv, usecols=unary_features)
        y_df = pd.read_csv(curr_lables_csv, dtype=int)
        x_test = x_df.to_numpy()
        y_test = y_df.to_numpy()
        y_test.shape = (y_test.shape[0], )

        if settings.normalize_features == 1:
            min_d = np.minimum(np.min(data['x_train'], 0),
                               np.min(data['x_test'], 0))
            max_d = np.maximum(np.max(data['x_train'], 0),
                               np.max(data['x_test'], 0))
            range_d = max_d - min_d
            idx_range_d_small = range_d <= 0.  # find columns where all features are identical
            if data['n_dim'] > 1:
                range_d[
                    idx_range_d_small] = 1e-3  # non-zero value just to prevent division by 0
            elif idx_range_d_small:
                range_d = 1e-3
            x_test -= min_d + 0.
            x_test /= range_d

        cm_weights_prediction = np.ones(
            settings.n_mondrians) * 1.0 / settings.n_mondrians
        cm_pred_forest_test, cm_metrics_test = \
            mf.evaluate_predictions(data, x_test, y_test, \
            settings, param, weights_prediction, False)

        #y_test_pred = get_y_pred(cm_pred_forest_test['pred_prob'])
        y_test_pred = get_label_predictions(cm_pred_forest_test['pred_prob'])

        f_stats.write(str(file_name) + '\n')

        print 'y_test'
        print y_test[:25]

        print 'y_test_pred'
        print y_test_pred[:25]

        f_stats.write('\n y_test     y_mf_pred: \n')
        for x in range(25):
            if (y_test[x] < 10):
                f_stats.write('#  ' + str(y_test[x]) + '       #  ' +
                              str(y_test_pred[x]) + '\n')
            else:
                f_stats.write('#  ' + str(y_test[x]) + '      #  ' +
                              str(y_test_pred[x]) + '\n')

        f_stats.write('\n---------------------------------------\n')

        cm = compute_confusion_matrix(y_test, y_test_pred, print_cm=False)
        cm_path = cm_res_dir + '/' + file_name
        #np.savetxt(cm_path, cm, delimiter=",")

        # SAVE PREDICTIONS ON CORRESPONDING PCD
        '''end_idx = file_name.rfind('.')
        input_path = '/home/alberto/tesi/dataset/NYUDV2/trained_semseg_data/clustering/'
        pcd_name = file_name[0:end_idx] + '.pcd'
        print pcd_name
        pcd_path = input_path + pcd_name
        input_cloud = pypcd.PointCloud.from_path(pcd_path)

        point_x_list = input_cloud.pc_data['x']
        point_y_list = input_cloud.pc_data['y']
        point_z_list = input_cloud.pc_data['z']
        cluster_idx_list = input_cloud.pc_data['label']

        new_cloud = input_cloud.pc_data.copy
        new_cloud = input_cloud.pc_data.view(np.float32).reshape(input_cloud.pc_data.shape + (-1,))

        print 'Cluster cloud shape:' 
        print new_cloud.shape

        print 'Cluster label length: %d' % (len(cluster_idx_list))


        for n in range(new_cloud.shape[0]):
            new_cloud[n][0] = point_x_list[n]
            new_cloud[n][1] = point_y_list[n]
            new_cloud[n][2] = point_z_list[n]
            if(cluster_idx_list[n] > 4000):
                new_cloud[n][3] = 0
            else: 
                new_cloud[n][3] = y_test_pred[cluster_idx_list[n]]
        
        #res_pcd = pypcd.make_xyz_rgb_point_cloud(new_cloud)
        res_pcd = pypcd.make_xyz_label_point_cloud(new_cloud)
        output_path = '/home/alberto/tesi/mondrianforest/src/results/pcd/'+pcd_name
        res_pcd.save(output_path)'''

    print '...computation done.\n END.'
    f_stats.close()

Пример #4

0

Показать файл

from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal
from mondrianforest import process_command_line, MondrianForest

settings = process_command_line()
print 'Current settings:'
pp.pprint(vars(settings))

# Resetting random seed
reset_random_seed(settings)

# Loading data
data = load_data(settings)
print "Data: ", data
print type(settings)

param, cache = precompute_minimal(data, settings)

mf = MondrianForest(settings, data)

data['x_test']
print(data)

train_ids_current_minibatch = data['train_ids_partition']['current'][0]
print train_ids_current_minibatch.shape

print "First batch train on 5 data points"
mf.fit(data, train_ids_current_minibatch[0:5], settings, param, cache)
print mf.forest[0].counts
print "\nNow the extension\n"
mf.partial_fit(data, train_ids_current_minibatch[5:10], settings, param, cache)
print mf.forest[0].counts

Пример #5

0

Показать файл

Файл: ORF.py Проект: teamZeta/arp

    def __init__(self, image, region):

        self.window = max(region.width, region.height) * 2

        left = max(region.x, 0)
        top = max(region.y, 0)

        right = min(region.x + region.width, image.shape[1] - 1)
        bottom = min(region.y + region.height, image.shape[0] - 1)

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[top:bottom, left:right]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        self.hsv = np.zeros_like(image)
        self.hsv[..., 1] = 255

        self.pos = np.array([image[top:bottom, left:right].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        while (1):
            l = random.randint(-int((right - left) * 0.1), int((right - left) * 0.1))
            t = random.randint(-int((bottom - top) * 0.1), int((bottom - top) * 0.1))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 5:
                break
        self.pos = np.array(
            [np.array(self.old_img[top + t:bottom + t, left + l:right + l].copy().tolist()) for (l, t) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint((right - left) / 2, image.shape[1] - (right - left) / 2 - 1)
            t = random.randint((bottom - top) / 2, image.shape[0] - (bottom - top) / 2 - 1)
            if abs(l - left - (right - left) / 2) > (right - left) and abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 5 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[t - (bottom - top) / 2:t + (bottom - top) / 2,
                      l - (right - left) / 2:l + (right - left) / 2].copy().tolist()) for (l, t) in vred])

        print("tu je image")
        stevec = 1
        for i in self.pos:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec += 1


        # Resetting random seed
        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 2, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)
        stevec = -30
        for i in self.neg:
            cv2.imwrite("file" + str(stevec) + ".png", i)
            stevec -= 1
        x_trainp = [x.flatten().tolist() for x in self.pos]
        x_trainn = [x.flatten().tolist() for x in self.neg]
        x_train = x_trainp + x_trainn
        print(len(x_train[0]))
        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)

        self.mf = MondrianForest(self.settings, self.data)

        for idx_minibatch in range(self.settings.n_minibatches):
            #train_ids_current_minibatch = self.data['train_ids_partition']['current'][idx_minibatch]
            if idx_minibatch == 0:
                # Batch training for first minibatch
                self.mf.fit(self.data, array(range(0, len(x_train)/2)), self.settings, self.param, self.cache)
            else:
                # Online update
                self.mf.partial_fit(self.data, array(range(len(x_train)/2, len(x_train))), self.settings, self.param, self.cache)
                print("updatalo je")

        weights_prediction = np.ones(self.settings.n_mondrians) * 1.0 / self.settings.n_mondrians
        #train_ids_cumulative = self.data['train_ids_partition']['cumulative'][idx_minibatch]
        print(self.mf.evaluate_predictions(self.data, array([x_train[5]]), [1], \
                    self.settings, self.param, weights_prediction, False))

Пример #6

0

Показать файл

Файл: mondrianforest_demo.py Проект: Flipajs/mondrianforest

from mondrianforest_utils import load_data, reset_random_seed, precompute_minimal 
from mondrianforest import process_command_line, MondrianForest

PLOT = False

settings = process_command_line()
print 'Current settings:'
pp.pprint(vars(settings))

# Resetting random seed
reset_random_seed(settings)

# Loading data
data = load_data(settings)

param, cache = precompute_minimal(data, settings)

mf = MondrianForest(settings, data)

print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves'

for idx_minibatch in range(settings.n_minibatches):
    train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch]
    if idx_minibatch == 0:
        # Batch training for first minibatch
        mf.fit(data, train_ids_current_minibatch, settings, param, cache)
    else:
        # Online update
        mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache)

    # Evaluate

Пример #7

0

Показать файл

Файл: ORF.py Проект: teamZeta/arp2

    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int)/4
        h, s, v = cv2.split(image)
        s+=45
        v+=109
        image = h#cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2, region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([image[int(top):int(bottom), int(left):int(right)].copy().tolist()])
        self.neg = np.array([])

        vred = [(0, 0)]
        infloop = 0
        while(1):
            l = random.randint(-int((right - left) * 0.03), int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03), int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop+=1

        self.pos = np.array(
            [np.array(self.old_img[top + int(t2):bottom + int(t2), left + int(l2):right + int(l2)].copy().tolist()) for (l2, t2) in vred])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2), int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2), int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (right - left) or abs(t - top - (bottom - top) / 2) > (
                        bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop+=1
        self.neg = np.array(
            [np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) + (bottom - top) / 2,
                      int(l2) - (right - left) / 2:int(l2) + (right - left) / 2].copy().tolist()) for (l2, t2) in vred])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {'optype': 'class', 'verbose': 1, 'draw_mondrian': 0, 'perf_dataset_keys': ['train', 'test'],
               'data_path': '../../process_data/', 'dataset': 'toy-mf', 'tag': '', 'alpha': 0, 'bagging': 0,
               'select_features': 0, 'smooth_hierarchically': 1, 'normalize_features': 1, 'min_samples_split': 2,
               'save': 0, 'discount_factor': 10, 'op_dir': 'results', 'init_id': 1, 'store_every': 0,
               'perf_store_keys': ['pred_prob'], 'perf_metrics_keys': ['log_prob', 'acc'], 'budget': -1.0,
               'n_mondrians': 10, 'debug': 0, 'n_minibatches': 1, 'name_metric': 'acc', 'budget_to_use': inf}
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.pos])
        x_trainn = np.array([np.bincount(x.flatten().astype(int),minlength=45) for x in self.neg])
        x_train = np.append(x_trainp, x_trainn, axis=0)

        self.data = {'n_dim': 1, 'x_test':
                array([x_train[5]]),
                'x_train': array(x_train),
                'y_train': array(np.ones(len(self.pos)).astype(int).tolist() + np.zeros(len(self.neg)).astype(int).tolist()), 'is_sparse': False, 'n_train': len(x_train), 'n_class': 2,
                'y_test': array([]),
                'n_test': 0}

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings, self.param, self.cache)
        print("kones")

Пример #8

0

Показать файл

def main():
    # Import settings from command line
    settings = process_command_line()
    print 'Current settings:'
    pp.pprint(vars(settings))

    # Resetting random seed
    reset_random_seed(settings)

    # Loading data
    data = load_dataset(settings)

    param, cache = precompute_minimal(data,settings)

    mf = MondrianForest(settings, data)

    print '\nminibatch\tmetric_train\tmetric_test\tnum_leaves'

    for idx_minibatch in range(settings.n_minibatches):
        train_ids_current_minibatch = data['train_ids_partition']['current'][idx_minibatch]
        if idx_minibatch == 0:
            # Batch training for first minibatch
            mf.fit(data, train_ids_current_minibatch, settings, param, cache)
        else:
            # Online update
            mf.partial_fit(data, train_ids_current_minibatch, settings, param, cache)

        # Evaluate
        weights_prediction = np.ones(settings.n_mondrians) * 1.0 / settings.n_mondrians
        train_ids_cumulative = data['train_ids_partition']['cumulative'][idx_minibatch]
        pred_forest_train, metrics_train = \
            mf.evaluate_predictions(data, data['x_train'][train_ids_cumulative, :], \
            data['y_train'][train_ids_cumulative], \
            settings, param, weights_prediction, False)
        pred_forest_test, metrics_test = \
            mf.evaluate_predictions(data, data['x_test'], data['y_test'], \
            settings, param, weights_prediction, False)
        name_metric = settings.name_metric     # acc or mse
        metric_train = metrics_train[name_metric]
        metric_test = metrics_test[name_metric]
        tree_numleaves = np.zeros(settings.n_mondrians)
        for i_t, tree in enumerate(mf.forest):
            tree_numleaves[i_t] = len(tree.leaf_nodes)
        forest_numleaves = np.mean(tree_numleaves)
        print '%9d\t%.3f\t\t%.3f\t\t%.3f' % (idx_minibatch, metric_train, metric_test, forest_numleaves)
        print 'length of y_test'
        print data['y_test'].shape

        y_test_pred = get_y_pred(pred_forest_test['pred_prob'])
        print 'lenght of y_test_pred:'
        print y_test_pred.shape

        for x in range(0,len(y_test_pred)):
            print 'label: %d mf prediction: %d' % (data['y_test'][x], y_test_pred[x])

        cm = confusion_matrix(data['y_test'], y_test_pred)
        
        # Show confusion matrix in a separate window
        plt.matshow(cm)
        plt.title('Confusion matrix')
        plt.colorbar()
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.show()





    print '\nFinal forest stats:'
    tree_stats = np.zeros((settings.n_mondrians, 2))
    tree_average_depth = np.zeros(settings.n_mondrians)
    for i_t, tree in enumerate(mf.forest):
        tree_stats[i_t, -2:] = np.array([len(tree.leaf_nodes), len(tree.non_leaf_nodes)])
        tree_average_depth[i_t] = tree.get_average_depth(settings, data)[0]
    print 'mean(num_leaves) = %.1f, mean(num_non_leaves) = %.1f, mean(tree_average_depth) = %.1f' \
            % (np.mean(tree_stats[:, -2]), np.mean(tree_stats[:, -1]), np.mean(tree_average_depth))
    print 'n_train = %d, log_2(n_train) = %.1f, mean(tree_average_depth) = %.1f +- %.1f' \
            % (data['n_train'], np.log2(data['n_train']), np.mean(tree_average_depth), np.std(tree_average_depth))

Пример #9

0

Показать файл

Файл: ORF.py Проект: teamZeta/arp3

    def __init__(self, img, region):
        image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        image = image.astype(int) / 4
        h, s, v = cv2.split(image)
        s += 45
        v += 109
        image = h  # cv2.merge((h, s, v))
        self.window = max(region.width, region.height) * 2

        left = int(max(region.x, 0))
        top = int(max(region.y, 0))

        right = int(min(region.x + region.width, image.shape[1] - 1))
        bottom = int(min(region.y + region.height, image.shape[0] - 1))

        if (right - left) % 2 != 0:
            right -= 1
        if (bottom - top) % 2 != 0:
            bottom -= 1

        self.template = image[int(top):int(bottom), int(left):int(right)]
        self.position = (region.x + region.width / 2,
                         region.y + region.height / 2)
        self.size = (region.width, region.height)
        self.old_img = image
        self.pos = np.array([
            image[int(top):int(bottom),
                  int(left):int(right)].copy().tolist()
        ])
        self.neg = np.array([])
        self.st_neg = 0

        vred = [(0, 0)]
        infloop = 0
        while (1):
            l = random.randint(-int((right - left) * 0.03),
                               int((right - left) * 0.03))
            t = random.randint(-int((bottom - top) * 0.03),
                               int((bottom - top) * 0.03))
            if l + left >= 0 and l + right < image.shape[
                    1] and t + top >= 0 and t + bottom < image.shape[0]:
                vred += [(l, t)]
            if len(vred) > 15 or infloop > 10000:
                break
            infloop += 1

        self.pos = np.array([
            np.array(self.old_img[top + int(t2):bottom + int(t2), left +
                                  int(l2):right + int(l2)].copy().tolist())
            for (l2, t2) in vred
        ])
        vred = []
        infloop = 0
        while (1):
            l = random.randint(int((right - left) / 2),
                               int(image.shape[1] - (right - left) / 2 - 1))
            t = random.randint(int((bottom - top) / 2),
                               int(image.shape[0] - (bottom - top) / 2 - 1))
            if abs(l - left - (right - left) / 2) > (
                    right - left) or abs(t - top -
                                         (bottom - top) / 2) > (bottom - top):
                vred += [(l, t)]
            if len(vred) > 45 or infloop > 10000:
                break
            infloop += 1
        self.neg = np.array([
            np.array(self.old_img[int(t2) - (bottom - top) / 2:int(t2) +
                                  (bottom - top) / 2,
                                  int(l2) - (right - left) / 2:int(l2) +
                                  (right - left) / 2].copy().tolist())
            for (l2, t2) in vred
        ])
        print("pred update")
        print("neg" + str(len(self.neg)))
        print("pos" + str(len(self.pos)))

        set = {
            'optype': 'class',
            'verbose': 1,
            'draw_mondrian': 0,
            'perf_dataset_keys': ['train', 'test'],
            'data_path': '../../process_data/',
            'dataset': 'toy-mf',
            'tag': '',
            'alpha': 0,
            'bagging': 0,
            'select_features': 0,
            'smooth_hierarchically': 1,
            'normalize_features': 1,
            'min_samples_split': 2,
            'save': 0,
            'discount_factor': 10,
            'op_dir': 'results',
            'init_id': 1,
            'store_every': 0,
            'perf_store_keys': ['pred_prob'],
            'perf_metrics_keys': ['log_prob', 'acc'],
            'budget': -1.0,
            'n_mondrians': 10,
            'debug': 0,
            'n_minibatches': 1,
            'name_metric': 'acc',
            'budget_to_use': inf
        }
        self.settings = Map(set)
        reset_random_seed(self.settings)

        x_trainp = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.pos
        ])
        x_trainn = np.array([
            np.bincount(x.flatten().astype(int), minlength=45)
            for x in self.neg
        ])
        if len(self.neg) > 0:
            x_train = np.append(x_trainp, x_trainn, axis=0)
            self.st_neg = 1
        else:
            x_train = x_trainp
            self.st_neg = 0

        self.data = {
            'n_dim':
            1,
            'x_test':
            array([x_train[5]]),
            'x_train':
            array(x_train),
            'y_train':
            array(
                np.ones(len(self.pos)).astype(int).tolist() +
                np.zeros(len(self.neg)).astype(int).tolist()),
            'is_sparse':
            False,
            'n_train':
            len(x_train),
            'n_class':
            2,
            'y_test':
            array([]),
            'n_test':
            0
        }

        self.param, self.cache = precompute_minimal(self.data, self.settings)
        self.mf = MondrianForest(self.settings, self.data)
        self.mf.fit(self.data, array(range(0, len(x_train))), self.settings,
                    self.param, self.cache)

Python precompute_minimal примеры использования