Exemplo n.º 1
0
def getprobability(object, grasps):
    obj_name = object[1]
    sdf_name = object[2]
    obj_mesh = of.ObjFile(obj_name).read()
    sdf_ = sf.SdfFile(sdf_name).read()
    obj = go.GraspableObject3D(sdf_,
                               mesh=obj_mesh,
                               key=object[0].replace("_features.txt", ""),
                               model_name=obj_name)
    config_name = "cfg/correlated.yaml"
    config = ec.ExperimentConfig(config_name)
    np.random.seed(100)

    brute_force_iter = config['bandit_brute_force_iter']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
        #		tc.ConfidenceTerminationCondition(confidence)
    ]

    # run bandits!
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'],
                            config['sigma_mu'])  # friction Gaussian RV

    # compute feature vectors for all grasps
    feature_extractor = ff.GraspableFeatureExtractor(obj, config)
    all_features = feature_extractor.compute_all_features(grasps)

    candidates = []
    for grasp, features in zip(grasps, all_features):
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        if features is None:
            pass
        else:
            pfc_rv.set_features(features)
            candidates.append(pfc_rv)

    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)
    objective = objectives.RandomBinaryObjective()

    # uniform allocation for true values
    ua = das.UniformAllocationMean(objective, candidates)
    ua_result = ua.solve(
        termination_condition=tc.MaxIterTerminationCondition(brute_force_iter),
        snapshot_rate=snapshot_rate)
    estimated_pfc = models.BetaBernoulliModel.beta_mean(
        ua_result.models[-1].alphas, ua_result.models[-1].betas)
    return estimated_pfc
Exemplo n.º 2
0
 def solve(
         self,
         termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER),
         snapshot_rate=1):
     """ Call discrete maxmization function with all candidates """
     return self.discrete_maximize(self.candidates_, termination_condition,
                                   snapshot_rate)
Exemplo n.º 3
0
def test_grad_ascent():
    np.random.seed(100)

    # init vars
    x_dim = int(10)
    b_dim = int(5)
    A = np.random.rand(b_dim, x_dim)
    b = np.random.rand(b_dim)
    x_0 = np.random.rand(x_dim)
    objective = objectives.MinimizationObjective(
        objectives.LeastSquaresObjective(A, b))

    # get actual solution
    try:
        true_best_x = np.linalg.solve(A.T.dot(A), A.T.dot(b))
    except np.linalg.LinAlgError:
        logging.error('A transpose A ws not invertible!')
    true_best_f = objective(true_best_x)

    # run gradient ascent
    step_policy = BacktrackingLSPolicy()
    optimizer = UnconstrainedGradientAscent(objective, step_policy)
    result = optimizer.solve(
        termination_condition=tc.MaxIterTerminationCondition(100),
        snapshot_rate=1,
        start_x=x_0,
        true_x=true_best_x)

    assert (np.abs(np.linalg.norm(result.best_f - true_best_f)) < 1e-2)

    logging.info('Val at true best x: %f' % (true_best_f))
    logging.info('Val at estimated best x: %f' % (result.best_f))
    plot_value_vs_time_gradient(result, true_best_f,
                                'Least squares grad ascent')
Exemplo n.º 4
0
def test_gittins_indices_98(num_candidates=NUM_CANDIDATES):
    # get candidates
    np.random.seed(1000)
    pred_means = np.random.rand(num_candidates)
    candidates = []
    for i in range(num_candidates):
        candidates.append(BernoulliRV(pred_means[i]))

    # get true maximum
    true_max = np.max(pred_means)
    true_max_indices = np.where(pred_means == true_max)

    # solve using uniform allocation
    obj = objectives.RandomBinaryObjective()
    ua = GittinsIndex98(obj, candidates)
    result = ua.solve(termination_condition=tc.MaxIterTerminationCondition(
        MAX_ITERS * 10),
                      snapshot_rate=SNAPSHOT_RATE)
    # NOTE: needs more iters on this problem

    # check result (not guaranteed to work in finite iterations but whatever)
    assert (len(result.best_candidates) == 1)
    assert (np.abs(result.best_candidates[0].p() - true_max) < 1e-4)
    logging.info('Gittins Indices test passed!')
    logging.info('Took %f sec' % (result.total_time))
    logging.info('Best index %d' % (true_max_indices[0]))

    # visualize result
    plot_num_pulls(result)
    plt.title('Observations Per Variable for Gittins Indices 98')

    plot_value_vs_time(result, candidates, true_max)
    plt.title('P(Success) versus Iterations for Gittins Indices 98')

    return result
Exemplo n.º 5
0
def test_gaussian_ucb(num_candidates=NUM_CANDIDATES):
    # get candidates
    np.random.seed(1000)
    actual_means = np.random.rand(num_candidates)
    candidates = [BernoulliRV(m) for m in actual_means]

    # get true maximum
    true_max = np.max(actual_means)
    true_max_indices = np.where(actual_means == true_max)

    # solve using GP-UCB
    obj = objectives.RandomBinaryObjective()
    ts = GaussianUCBSampling(obj, candidates)
    result = ts.solve(
        termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS),
        snapshot_rate=SNAPSHOT_RATE)

    # check result (not guaranteed to work in finite iterations but whatever)
    assert len(result.best_candidates) == 1
    assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4
    logging.info('Gaussian UCB sampling test passed!')
    logging.info('Took %f sec' % (result.total_time))
    logging.info('Best index %d' % (true_max_indices[0]))

    # visualize result
    plot_num_pulls(result)
    plt.title('Observations Per Variable for Gaussian UCB Sampling')

    plot_value_vs_time(result, candidates, true_max)
    plt.title('P(Success) versus Iterations for Gaussian UCB Sampling')

    return result
Exemplo n.º 6
0
 def solve(
         self,
         termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER),
         snapshot_rate=1):
     '''
     Solves for the maximal / minimal point
     '''
     pass
Exemplo n.º 7
0
def run_ua_on(obj, config):
    # sample grasps
    sample_start = time.clock()
    if config['grasp_sampler'] == 'antipodal':
        logging.info('Using antipodal grasp sampling')
        sampler = ags.AntipodalGraspSampler(config)
        grasps = sampler.generate_grasps(
            obj, check_collisions=config['check_collisions'])

        # pad with gaussian grasps
        num_grasps = len(grasps)
        min_num_grasps = config['min_num_grasps']
        if num_grasps < min_num_grasps:
            target_num_grasps = min_num_grasps - num_grasps
            gaussian_sampler = gs.GaussianGraspSampler(config)
            gaussian_grasps = gaussian_sampler.generate_grasps(
                obj,
                target_num_grasps=target_num_grasps,
                check_collisions=config['check_collisions'])
            grasps.extend(gaussian_grasps)
    else:
        logging.info('Using Gaussian grasp sampling')
        sampler = gs.GaussianGraspSampler(config)
        grasps = sampler.generate_grasps(
            obj, check_collisions=config['check_collisions'])

    # generate pfc candidates
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu'])
    candidates = []
    for grasp in grasps:
        logging.info('Adding grasp %d candidate' % (len(candidates)))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        candidates.append(pfc_rv)
    logging.info('%d candidates', len(candidates))

    brute_force_iter = config['bandit_brute_force_iter'] * len(candidates)
    snapshot_rate = brute_force_iter
    objective = objectives.RandomBinaryObjective()

    ua = das.UniformAllocationMean(objective, candidates)
    logging.info('Running uniform allocation for true pfc.')
    bandit_start = time.clock()
    ua_result = ua.solve(
        termination_condition=tc.MaxIterTerminationCondition(brute_force_iter),
        snapshot_rate=snapshot_rate)
    bandit_end = time.clock()
    bandit_duration = bandit_end - bandit_start
    logging.info('Uniform allocation (%d iters) took %f sec' %
                 (brute_force_iter, bandit_duration))

    return ua_result
Exemplo n.º 8
0
def test_stochastic_grad_ascent_logistic():
    np.random.seed(100)

    num_from_zero = 100
    zero_mean, zero_cov = np.random.rand(2), 3e-2 * np.diag(np.random.rand(2))
    num_from_one = 100
    one_mean, one_cov = np.random.rand(2), 3e-2 * np.diag(np.random.rand(2))
    from_zero = np.random.multivariate_normal(zero_mean, zero_cov,
                                              num_from_zero)
    from_one = np.random.multivariate_normal(one_mean, one_cov, num_from_one)

    X = np.hstack([
        np.ones((num_from_zero + num_from_one, 1)),
        np.vstack([from_zero, from_one])
    ])
    y = np.hstack([np.zeros(num_from_zero), np.ones(num_from_one)])

    objective = objectives.MinimizationObjective(
        objectives.StochasticLogisticCrossEntropyObjective(X, y,
                                                           batch_size=50))

    # get ground truth solution from scikit-learn
    from sklearn.linear_model import LogisticRegression
    solver = LogisticRegression(fit_intercept=False)
    solver.fit(X, y)
    solution = solver.coef_.squeeze()
    solver_score = sum(solver.predict(X) == y)
    true_best_f = objective(solution)

    logging.info('scikit-learn solution: %s', solution)
    logging.info('scikit-learn score: %f', true_best_f)
    logging.info('scikit-learn iterations: %d', solver.n_iter_)

    # run gradient ascent
    step_policy = DecayingStepPolicy(1)
    optimizer = UnconstrainedGradientAscent(objective, step_policy)
    result = optimizer.solve(
        termination_condition=tc.MaxIterTerminationCondition(100),
        snapshot_rate=1,
        start_x=np.random.rand(3),
        true_x=solution)
    logging.info('our solution: %s', result.best_x)
    logging.info('our score: %f', result.best_f)

    logging.info('Val at true best x: %f' % (true_best_f))
    logging.info('Val at estimated best x: %f' % (result.best_f))
    plot_value_vs_time_gradient(result, true_best_f, 'Logistic SGA')
Exemplo n.º 9
0
def test_correlated_thompson_sampling(num_candidates=NUM_CANDIDATES,
                                      sig=1.0,
                                      eps=0.5):
    # get candidates
    actual_means = np.linspace(0.0, 1.0, num=num_candidates)
    candidates = [BernoulliRV(m) for m in actual_means]

    # get true maximum
    true_max = np.max(actual_means)
    true_max_indices = np.where(actual_means == true_max)

    # constructing nearest neighbor and kernel
    def phi(bern):
        return np.array([round(bern.p(), 2)])

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=sig, phi=phi)

    # solve using Thompson sampling
    obj = objectives.RandomBinaryObjective()
    ts = CorrelatedThompsonSampling(obj, candidates, nn, kernel, tolerance=eps)
    result = ts.solve(
        termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS),
        snapshot_rate=SNAPSHOT_RATE)

    # check result (not guaranteed to work in finite iterations but whatever)
    assert len(result.best_candidates) == 1
    assert np.abs(result.best_candidates[0].p() - true_max) < 1e-4
    logging.info('Correlated Thompson sampling test passed!')
    logging.info('Took %f sec' % (result.total_time))
    logging.info('Best index %d' % (true_max_indices[0]))

    info = u' (σ=%.1f, ɛ=%.3f)' % (sig, eps)

    # visualize result
    plot_num_pulls(result)
    plt.title('Observations Per Variable for Correlated Thompson Sampling' +
              info)

    plot_value_vs_time(result, candidates, true_max)
    plt.title('P(Success) versus Iterations for Correlated Thompson Sampling' +
              info)

    return result
Exemplo n.º 10
0
    def top_K_solve(
            self,
            K,
            termination_condition=tc.MaxIterTerminationCondition(DEF_MAX_ITER),
            snapshot_rate=1):
        """ Solves for the top K maximal / minimal points """
        # partition the input space
        if K == 1:
            candidate_bins = [self.candidates_]
        else:
            candidate_bins = self.partition(K)

        # maximize over each bin
        top_K_results = []
        for k in range(K):
            top_K_results.append(
                self.discrete_maximize(candidate_bins[k],
                                       termination_condition, snapshot_rate))
        return top_K_results
Exemplo n.º 11
0
def test_thompson_sampling(num_candidates=NUM_CANDIDATES, random=False):
    # get candidates
    np.random.seed(1000)
    if random:
        pred_means = np.random.rand(num_candidates)
    else:
        pred_means = np.linspace(0.0, 1.0, num=num_candidates)
    candidates = []
    for i in range(num_candidates):
        candidates.append(BernoulliRV(pred_means[i]))

    # get true maximum
    true_max = np.max(pred_means)
    true_max_indices = np.where(pred_means == true_max)

    # solve using uniform allocation
    obj = objectives.RandomBinaryObjective()
    ua = ThompsonSampling(obj, candidates)

    result = ua.solve(
        termination_condition=tc.MaxIterTerminationCondition(MAX_ITERS),
        snapshot_rate=SNAPSHOT_RATE)

    # check result (not guaranteed to work in finite iterations but whatever)
    assert (len(result.best_candidates) == 1)
    assert (np.abs(result.best_candidates[0].p() - true_max) < 1e-4)
    logging.info('Thompson sampling test passed!')
    logging.info('Took %f sec' % (result.total_time))
    logging.info('Best index %d' % (true_max_indices[0]))

    # visualize result
    plot_num_pulls(result)
    plt.title('Observations Per Variable for Thompson Sampling')

    plot_value_vs_time(result, candidates, true_max)
    plt.title('P(Success) versus Iterations for Thompson Sampling')

    return result
Exemplo n.º 12
0
def expected_quality(grasp_rv, graspable_rv, params_rv, quality_config):
    """
    Get the expected quality wrt given random variables
    """
    # set up random variable
    q_rv = QuasiStaticGraspQualityRV(grasp_rv, graspable_rv,
                                     params_rv, quality_config)
    candidates = [q_rv]
    
    # brute force with uniform allocation
    snapshot_rate = quality_config['sampling_snapshot_rate']
    num_samples = quality_config['num_quality_samples']
    objective = objectives.RandomContinuousObjective()
    ua = das.GaussianUniformAllocationMean(objective, candidates)
    ua_result = ua.solve(termination_condition = tc.MaxIterTerminationCondition(num_samples),
                         snapshot_rate = snapshot_rate)

    # convert to estimated prob success
    final_model = ua_result.models[-1]
    mn_q = final_model.means
    std_q = final_model.sample_vars
    return mn_q[0], std_q[0]
        
Exemplo n.º 13
0
def label_pfc(obj, dataset, output_dir, config):
    """ Label an object with grasps according to probability of force closure """
    # sample intial antipodal grasps
    start = time.clock()
    sampler = ags.AntipodalGraspSampler(config)

    start_time = time.clock()
    grasps, alpha_thresh, rho_thresh = sampler.generate_grasps(obj, vis=False)
    end_time = time.clock()
    duration = end_time - start_time
    logging.info('Antipodal grasp candidate generation took %f sec' %
                 (duration))

    # partition grasps
    grasp_partitions = pfc.space_partition_grasps(grasps, config)

    # bandit params
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
        tc.ConfidenceTerminationCondition(confidence)
    ]

    # run bandits on each partition
    object_grasps = []
    grasp_qualities = []
    i = 0
    for grasp_partition in grasp_partitions:
        logging.info('Finding highest quality grasp in partition %d' % (i))
        # create random variables
        graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
        f_rv = scipy.stats.norm(
            config['friction_coef'],
            config['sigma_mu'])  # friction gaussian random variable
        candidates = []

        for grasp in grasp_partition:
            grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
            candidates.append(
                pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config))

        # run bandits
        objective = objectives.RandomBinaryObjective()
        ts = das.ThompsonSampling(objective, candidates)
        ts_result = ts.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)
        object_grasps.extend([c.grasp for c in ts_result.best_candidates])
        grasp_qualities.extend(list(ts_result.best_pred_means))
        i = i + 1

    stop = time.clock()
    logging.info('Took %d sec' % (stop - start))

    # get rotated, translated versions of grasps
    delay = 0
    pr2_grasps = []
    pr2_grasp_qualities = []
    theta_res = config['grasp_theta_res'] * np.pi
    grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps'])
    i = 0
    if config['vis_grasps']:
        delay = config['vis_delay']

    for grasp in object_grasps:
        print 'Grasp', i
        rotated_grasps = grasp.transform(obj.tf, theta_res)
        rotated_grasps = grasp_checker.prune_grasps_in_collision(
            obj,
            rotated_grasps,
            auto_step=True,
            close_fingers=False,
            delay=delay)
        pr2_grasps.extend(rotated_grasps)
        pr2_grasp_qualities.extend([grasp_qualities[i]] * len(rotated_grasps))
        i = i + 1

    logging.info('Num grasps: %d' % (len(pr2_grasps)))

    # save grasps locally :( Due to problems with sudo
    grasp_filename = os.path.join(output_dir, obj.key + '.json')
    with open(grasp_filename, 'w') as f:
        jsons.dump([
            pr2_grasps[i].to_json(quality=pr2_grasp_qualities[i])
            for i in range(len(pr2_grasps))
        ], f)
Exemplo n.º 14
0
def test_ua_vs_thompson(num_trials=20,
                        num_candidates=1000,
                        brute_iters=50000,
                        max_iters=5000,
                        snapshot_rate=20):
    # get candidates
    np.random.seed(1000)
    prior_dist = 'gaussian'

    # iterate through trials
    ua_results = []
    ts_results = []
    true_pfcs = []
    est_pfcs = []
    for i in range(num_trials):
        logging.info('Trial %d' % (i))

        # generate rangom candidates
        if prior_dist == 'gaussian':
            true_pfc = scipy.stats.norm.rvs(loc=0.5,
                                            scale=0.1,
                                            size=num_candidates)
            true_pfc[true_pfc < 0] = 0
            true_pfc[true_pfc > 1] = 1
        else:
            true_pfc = np.random.rand(num_candidates)

        candidates = []
        for i in range(num_candidates):
            candidates.append(BernoulliRV(true_pfc[i]))

        # get true maximum
        true_max = np.max(true_pfc)
        true_max_indices = np.where(true_pfc == true_max)

        # solve using uniform allocation
        obj = objectives.RandomBinaryObjective()

        ua = UniformAllocationMean(obj, candidates)
        ua_result = ua.solve(
            termination_condition=tc.MaxIterTerminationCondition(brute_iters),
            snapshot_rate=snapshot_rate)

        ts = ThompsonSampling(obj, candidates)
        ts_result = ts.solve(
            termination_condition=tc.MaxIterTerminationCondition(max_iters),
            snapshot_rate=snapshot_rate)

        # check result (not guaranteed to work in finite iterations but whatever)
        logging.info('UA took %f sec' % (ua_result.total_time))
        logging.info('UA best index %d' % (true_max_indices[0][0]))

        logging.info('TS took %f sec' % (ts_result.total_time))
        logging.info('TS best index %d' % (true_max_indices[0][0]))

        brute_model = ua_result.models[-1]

        true_pfcs.append(true_pfc)
        est_pfcs.append(
            models.BetaBernoulliModel.beta_mean(brute_model.alphas,
                                                brute_model.betas))
        ua_results.append(ua_result)
        ts_results.append(ts_result)

    # aggregate results wrt truth
    all_ua_norm_rewards = np.zeros([len(ua_results), len(ua_results[0].iters)])
    all_ts_norm_rewards = np.zeros([len(ts_results), len(ts_results[0].iters)])
    j = 0
    for true_pfc, result in zip(true_pfcs, ua_results):
        best_pfc = np.max(true_pfc)
        ua_pred_values = np.array(
            [true_pfc[m.best_pred_ind] for m in result.models])
        all_ua_norm_rewards[j, :] = ua_pred_values / best_pfc
        j += 1

    j = 0
    for true_pfc, result in zip(true_pfcs, ts_results):
        best_pfc = np.max(true_pfc)
        ts_pred_values = np.array(
            [true_pfc[m.best_pred_ind] for m in result.models])
        all_ts_norm_rewards[j, :] = ts_pred_values / best_pfc
        j += 1

    # aggregate results wrt est
    all_ua_norm_est_rewards = np.zeros(
        [len(ua_results), len(ua_results[0].iters)])
    all_ts_norm_est_rewards = np.zeros(
        [len(ts_results), len(ts_results[0].iters)])
    j = 0
    for est_pfc, result in zip(est_pfcs, ua_results):
        best_pfc = np.max(est_pfc)
        ua_pred_values = np.array(
            [est_pfc[m.best_pred_ind] for m in result.models])
        all_ua_norm_est_rewards[j, :] = ua_pred_values / best_pfc
        j += 1

    j = 0
    for est_pfc, result in zip(est_pfcs, ts_results):
        best_pfc = np.max(est_pfc)
        ts_pred_values = np.array(
            [est_pfc[m.best_pred_ind] for m in result.models])
        all_ts_norm_est_rewards[j, :] = ts_pred_values / best_pfc
        j += 1

    # params
    line_width = 2.5
    font_size = 15

    # histogram of all arms
    all_true_pfcs = np.zeros(0)
    for true_pfc in true_pfcs:
        all_true_pfcs = np.r_[all_true_pfcs, true_pfc]

    num_bins = 100
    bin_edges = np.linspace(0, 1, num_bins + 1)
    plt.figure()
    n, bins, patches = plt.hist(all_true_pfcs, bin_edges)
    plt.xlabel('Probability of Success', fontsize=font_size)
    plt.ylabel('Num Grasps', fontsize=font_size)
    plt.title('Histogram of Grasps by Probability of Success',
              fontsize=font_size)

    # visualize result
    ua_avg_norm_reward = np.mean(all_ua_norm_rewards, axis=0)
    ts_avg_norm_reward = np.mean(all_ts_norm_rewards, axis=0)

    plt.figure()
    plt.plot(ua_results[0].iters,
             ua_avg_norm_reward,
             c=u'b',
             linewidth=line_width,
             label='Uniform Allocation')
    plt.plot(ts_results[0].iters,
             ts_avg_norm_reward,
             c=u'g',
             linewidth=line_width,
             label='Thompson Sampling')

    plt.xlim(0, np.max(ts_results[0].iters))
    plt.ylim(0.5, 1)
    plt.xlabel('Iteration', fontsize=font_size)
    plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size)
    plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size)

    handles, labels = plt.gca().get_legend_handles_labels()
    plt.legend(handles, labels, loc='lower right')

    # visualize est result
    ua_avg_norm_est_reward = np.mean(all_ua_norm_est_rewards, axis=0)
    ts_avg_norm_est_reward = np.mean(all_ts_norm_est_rewards, axis=0)

    plt.figure()
    plt.plot(ua_results[0].iters,
             ua_avg_norm_est_reward,
             c=u'b',
             linewidth=line_width,
             label='Uniform Allocation')
    plt.plot(ts_results[0].iters,
             ts_avg_norm_est_reward,
             c=u'g',
             linewidth=line_width,
             label='Thompson Sampling')

    plt.xlim(0, np.max(ts_results[0].iters))
    plt.ylim(0.5, 1)
    plt.xlabel('Iteration', fontsize=font_size)
    plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size)
    plt.title('Avg Estimated Normalized PFC vs Iteration', fontsize=font_size)

    handles, labels = plt.gca().get_legend_handles_labels()
    plt.legend(handles, labels, loc='lower right')

    plt.show()

    return result
Exemplo n.º 15
0
    def discrete_maximize(self,
                          candidates,
                          termination_condition=tc.MaxIterTerminationCondition(
                              solvers.DEF_MAX_ITER),
                          snapshot_rate=1):
        """
        Maximizes a function over a discrete set of variables by
        iteratively predicting the best point (using some model policy)
        """
        # check input
        if len(candidates) == 0:
            raise ValueError('No candidates specified')

        if not isinstance(self.model_, models.DiscreteModel):
            logging.error('Illegal model specified')
            raise ValueError(
                'Illegitimate model used in DiscreteAdaptiveSampler')

        # init vars
        terminate = False
        k = 0  # cur iter
        num_candidates = len(candidates)
        self.reset_model(candidates)  # update model with new candidates

        # logging
        times = []
        iters = []
        iter_indices = []
        iter_vals = []
        iter_models = []
        start_time = time.clock()
        next_ind_val = 0

        while not terminate:
            # get next point to sample
            next_ind = self.selection_policy_.choose_next()

            # evaluate the function at the given point (can be nondeterministic)
            prev_ind_val = next_ind_val
            next_ind_val = self.objective_.evaluate(candidates[next_ind])

            # snapshot the model and whatnot
            if (k % snapshot_rate) == 0:
                #logging.info('Iteration %d' %(k))

                # log time and stuff
                checkpt = time.clock()
                times.append(checkpt - start_time)
                iters.append(k)
                iter_indices.append(next_ind)
                iter_vals.append(next_ind_val)
                iter_models.append(self.model_.snapshot())

            # update the model (e.g. posterior update, grasp pruning)
            self.model_.update(next_ind, next_ind_val)

            # check termination condiation
            terminate = termination_condition(k,
                                              cur_val=next_ind_val,
                                              prev_val=prev_ind_val,
                                              model=self.model_)
            k = k + 1

        # log final values
        checkpt = time.clock()
        times.append(checkpt - start_time)
        iters.append(k)
        iter_indices.append(next_ind)
        iter_vals.append(next_ind_val)
        iter_models.append(self.model_.snapshot())

        # log total runtime
        end_time = time.clock()
        total_duration = end_time - start_time

        # log results and return
        best_indices, best_pred_means, best_pred_vars = self.model_.max_prediction(
        )
        best_candidates = []
        num_best = best_indices.shape[0]
        for i in range(num_best):
            best_candidates.append(best_indices[i])
        return AdaptiveSamplingResult(best_candidates, best_pred_means,
                                      best_pred_vars, total_duration, times,
                                      iters, iter_indices, iter_vals,
                                      iter_models)
Exemplo n.º 16
0
def label_correlated(obj, chunk, dest, config, plot=False):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    bandit_start = time.clock()

    np.random.seed(100)

    # load grasps from database
    sample_start = time.clock()
    grasps = chunk.load_grasps(obj.key)
    sample_end = time.clock()
    sample_duration = sample_end - sample_start
    logging.info('Loaded %d grasps' % (len(grasps)))
    logging.info('Grasp candidate loading took %f sec' % (sample_duration))

    if not grasps:
        logging.info('Skipping %s' % (obj.key))
        return None

    # load features for all grasps
    feature_start = time.clock()
    feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config)
    all_features = feature_loader.load_all_features(
        grasps)  # in same order as grasps
    feature_end = time.clock()
    feature_duration = feature_end - feature_start
    logging.info('Loaded %d features' % (len(all_features)))
    logging.info('Grasp feature loading took %f sec' % (feature_duration))

    # bandit params
    brute_force_iter = config['bandit_brute_force_iter']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
        #        tc.ConfidenceTerminationCondition(confidence)
    ]

    # run bandits!
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'],
                            config['sigma_mu'])  # friction Gaussian RV

    candidates = []
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d' % len(candidates))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        if features is None:
            logging.info('Could not compute features for grasp.')
        else:
            pfc_rv.set_features(features)
            candidates.append(pfc_rv)

    # feature transform
    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)

    if config['grasp_symmetry']:

        def swapped_phi(rv):
            return rv.swapped_features

        nn = kernels.SymmetricKDTree(phi=phi, alternate_phi=swapped_phi)
        kernel = kernels.SymmetricSquaredExponentialKernel(
            sigma=config['kernel_sigma'],
            l=config['kernel_l'],
            phi=phi,
            alternate_phi=swapped_phi)
    objective = objectives.RandomBinaryObjective()

    # pre-computed pfc values
    estimated_pfc = np.array([c.grasp.quality for c in candidates])

    # uniform allocation baseline
    ua = das.UniformAllocationMean(objective, candidates)
    logging.info('Running uniform allocation.')
    ua_result = ua.solve(
        termination_condition=tc.OrTerminationCondition(tc_list),
        snapshot_rate=snapshot_rate)

    # Thompson sampling for faster convergence
    ts = das.ThompsonSampling(objective, candidates)
    logging.info('Running Thompson sampling.')
    ts_result = ts.solve(
        termination_condition=tc.OrTerminationCondition(tc_list),
        snapshot_rate=snapshot_rate)

    # correlated Thompson sampling for even faster convergence
    ts_corr = das.CorrelatedThompsonSampling(
        objective,
        candidates,
        nn,
        kernel,
        tolerance=config['kernel_tolerance'])
    logging.info('Running correlated Thompson sampling.')
    ts_corr_result = ts_corr.solve(
        termination_condition=tc.OrTerminationCondition(tc_list),
        snapshot_rate=snapshot_rate)

    object_grasps = [candidates[i].grasp for i in ts_result.best_candidates]
    grasp_qualities = list(ts_result.best_pred_means)

    bandit_stop = time.clock()
    logging.info('Bandits took %f sec' % (bandit_stop - bandit_start))

    # get rotated, translated versions of grasps
    delay = 0
    pr2_grasps = []
    pr2_grasp_qualities = []
    theta_res = config['grasp_theta_res'] * np.pi
    #    grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps'])

    if config['vis_grasps']:
        delay = config['vis_delay']

    for grasp, grasp_quality in zip(object_grasps, grasp_qualities):
        rotated_grasps = grasp.transform(obj.tf, theta_res)
        #        rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay)
        pr2_grasps.extend(rotated_grasps)
        pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps))

    logging.info('Num grasps: %d' % (len(pr2_grasps)))

    grasp_filename = os.path.join(dest, obj.key + '.json')
    with open(grasp_filename, 'w') as f:
        jsons.dump([
            g.to_json(quality=q)
            for g, q in zip(pr2_grasps, pr2_grasp_qualities)
        ], f)

    ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc)
    ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
    ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc)

    return BanditCorrelatedExperimentResult(ua_normalized_reward,
                                            ts_normalized_reward,
                                            ts_corr_normalized_reward,
                                            estimated_pfc,
                                            ua_result.iters,
                                            kernel.matrix(candidates),
                                            obj_key=obj.key)
Exemplo n.º 17
0
def label_correlated(obj, chunk, dest, config, plot=False, load=True):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    bandit_start = time.clock()

    np.random.seed(100)
    chunk = db.Chunk(config)

    if not load:
        # load grasps from database
        sample_start = time.clock()

        if config['grasp_sampler'] == 'antipodal':
            logging.info('Using antipodal grasp sampling')
            sampler = ags.AntipodalGraspSampler(config)
            grasps = sampler.generate_grasps(
                obj, check_collisions=config['check_collisions'], vis=plot)

            # pad with gaussian grasps
            num_grasps = len(grasps)
            min_num_grasps = config['min_num_grasps']
            if num_grasps < min_num_grasps:
                target_num_grasps = min_num_grasps - num_grasps
                gaussian_sampler = gs.GaussianGraspSampler(config)
                gaussian_grasps = gaussian_sampler.generate_grasps(
                    obj,
                    target_num_grasps=target_num_grasps,
                    check_collisions=config['check_collisions'],
                    vis=plot)
                grasps.extend(gaussian_grasps)
        else:
            logging.info('Using Gaussian grasp sampling')
            sampler = gs.GaussianGraspSampler(config)
            grasps = sampler.generate_grasps(
                obj,
                check_collisions=config['check_collisions'],
                vis=plot,
                grasp_gen_mult=6)
        sample_end = time.clock()
        sample_duration = sample_end - sample_start
        logging.info('Loaded %d grasps' % (len(grasps)))
        logging.info('Grasp candidate loading took %f sec' % (sample_duration))

        if not grasps:
            logging.info('Skipping %s' % (obj.key))
            return None

    else:
        grasps = load_grasps(obj, dest)
        grasps = grasps[:20]


#        grasps = chunk.load_grasps(obj.key)

# load features for all grasps
    feature_start = time.clock()
    feature_extractor = ff.GraspableFeatureExtractor(obj, config)

    features = feature_extractor.compute_all_features(grasps)
    """
    if not load:
        features = feature_extractor.compute_all_features(grasps)
    else:
        feature_loader = ff.GraspableFeatureLoader(obj, chunk.name, config)
        features = feature_loader.load_all_features(grasps) # in same order as grasps
    """
    feature_end = time.clock()
    feature_duration = feature_end - feature_start
    logging.info('Loaded %d features' % (len(features)))
    logging.info('Grasp feature loading took %f sec' % (feature_duration))

    # prune crappy grasps
    all_features = []
    all_grasps = []
    for grasp, feature in zip(grasps, features):
        if feature is not None:
            all_grasps.append(grasp)
            all_features.append(feature)
    grasps = all_grasps

    # compute distances for debugging
    distances = np.zeros([len(grasps), len(grasps)])
    i = 0
    for feature_i in all_features:
        j = 0
        for feature_j in all_features:
            distances[i, j] = np.linalg.norm(feature_i.phi - feature_j.phi)
            j += 1
        i += 1

    # bandit params
    brute_force_iter = config['bandit_brute_force_iter']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
    ]

    # run bandits!
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'],
                            config['sigma_mu'])  # friction Gaussian RV

    candidates = []
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d' % len(candidates))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        if features is None:
            logging.info('Could not compute features for grasp.')
        else:
            pfc_rv.set_features(features)
            candidates.append(pfc_rv)

    # feature transform
    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)
    objective = objectives.RandomBinaryObjective()

    if not load:
        # uniform allocation for true values
        ua = das.UniformAllocationMean(objective, candidates)
        logging.info('Running uniform allocation for true pfc.')
        ua_result = ua.solve(
            termination_condition=tc.MaxIterTerminationCondition(
                brute_force_iter),
            snapshot_rate=snapshot_rate)
        estimated_pfc = models.BetaBernoulliModel.beta_mean(
            ua_result.models[-1].alphas, ua_result.models[-1].betas)

        save_grasps(grasps, estimated_pfc, obj, dest)

        # plot params
        line_width = config['line_width']
        font_size = config['font_size']
        dpi = config['dpi']

        # plot histograms
        num_bins = 100
        bin_edges = np.linspace(0, 1, num_bins + 1)
        plt.figure()
        n, bins, patches = plt.hist(estimated_pfc, bin_edges)
        plt.xlabel('Probability of Success', fontsize=font_size)
        plt.ylabel('Num Grasps', fontsize=font_size)
        plt.title('Histogram of Grasps by Probability of Success',
                  fontsize=font_size)
        plt.show()

        exit(0)
    else:
        estimated_pfc = np.array([g.quality for g in grasps])

    # debugging for examining bad features
    bad_i = 0
    bad_j = 1
    grasp_i = grasps[bad_i]
    grasp_j = grasps[bad_j]
    pfc_i = estimated_pfc[bad_i]
    pfc_j = estimated_pfc[bad_j]
    features_i = all_features[bad_i]
    features_j = all_features[bad_j]
    feature_sq_diff = (features_i.phi - features_j.phi)**2
    #    grasp_i.close_fingers(obj, vis=True)
    #    grasp_j.close_fingers(obj, vis=True)

    grasp_i.surface_information(obj, config['window_width'],
                                config['window_steps'])
    grasp_j.surface_information(obj, config['window_width'],
                                config['window_steps'])

    w = config['window_steps']
    wi1 = np.reshape(features_i.extractors_[0].extractors_[1].phi, [w, w])
    wi2 = np.reshape(features_i.extractors_[1].extractors_[1].phi, [w, w])
    wj1 = np.reshape(features_j.extractors_[0].extractors_[1].phi, [w, w])
    wj2 = np.reshape(features_j.extractors_[1].extractors_[1].phi, [w, w])

    a = 0.1
    plt.figure()
    plt.subplot(2, 2, 1)
    plt.imshow(wi1, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wi1')

    plt.subplot(2, 2, 2)
    plt.imshow(wi2, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wi2')

    plt.subplot(2, 2, 3)
    plt.imshow(wj1, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wj1')

    plt.subplot(2, 2, 4)
    plt.imshow(wj2, cmap=plt.cm.Greys, interpolation='none')
    plt.colorbar()
    plt.clim(-a, a)  # fixing color range for visual comparisons
    plt.title('wj2')

    #    plt.show()
    #    IPython.embed()

    num_trials = config['num_trials']
    ts_rewards = []
    ts_corr_rewards = []

    for t in range(num_trials):
        logging.info('Trial %d' % (t))

        # Thompson sampling
        ts = das.ThompsonSampling(objective, candidates)
        logging.info('Running Thompson sampling.')
        ts_result = ts.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # correlated Thompson sampling for even faster convergence
        ts_corr = das.CorrelatedThompsonSampling(
            objective,
            candidates,
            nn,
            kernel,
            tolerance=config['kernel_tolerance'])
        logging.info('Running correlated Thompson sampling.')
        ts_corr_result = ts_corr.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
        ts_corr_normalized_reward = reward_vs_iters(ts_corr_result,
                                                    estimated_pfc)

        ts_rewards.append(ts_normalized_reward)
        ts_corr_rewards.append(ts_corr_normalized_reward)

    # get the bandit rewards
    all_ts_rewards = np.array(ts_rewards)
    all_ts_corr_rewards = np.array(ts_corr_rewards)
    avg_ts_rewards = np.mean(all_ts_rewards, axis=0)
    avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0)

    # get correlations and plot
    k = kernel.matrix(candidates)
    k_vec = k.ravel()
    pfc_arr = np.array([estimated_pfc]).T
    pfc_diff = ssd.squareform(ssd.pdist(pfc_arr))
    pfc_vec = pfc_diff.ravel()

    bad_ind = np.where(pfc_diff > 1.0 - k)

    plt.figure()
    plt.scatter(k_vec, pfc_vec)
    plt.xlabel('Kernel', fontsize=15)
    plt.ylabel('PFC Diff', fontsize=15)
    plt.title('Correlations', fontsize=15)
    #    plt.show()

    #    IPython.embed()

    # plot params
    line_width = config['line_width']
    font_size = config['font_size']
    dpi = config['dpi']

    # plot histograms
    num_bins = 100
    bin_edges = np.linspace(0, 1, num_bins + 1)
    plt.figure()
    n, bins, patches = plt.hist(estimated_pfc, bin_edges)
    plt.xlabel('Probability of Success', fontsize=font_size)
    plt.ylabel('Num Grasps', fontsize=font_size)
    plt.title('Histogram of Grasps by Probability of Success',
              fontsize=font_size)

    # plot the results
    plt.figure()
    plt.plot(ts_result.iters,
             avg_ts_rewards,
             c=u'g',
             linewidth=line_width,
             label='Thompson Sampling (Uncorrelated)')
    plt.plot(ts_corr_result.iters,
             avg_ts_corr_rewards,
             c=u'r',
             linewidth=line_width,
             label='Thompson Sampling (Correlated)')

    plt.xlim(0, np.max(ts_result.iters))
    plt.ylim(0.5, 1)
    plt.xlabel('Iteration', fontsize=font_size)
    plt.ylabel('Normalized Probability of Force Closure', fontsize=font_size)
    plt.title('Avg Normalized PFC vs Iteration', fontsize=font_size)

    handles, labels = plt.gca().get_legend_handles_labels()
    plt.legend(handles, labels, loc='lower right')
    plt.show()

    IPython.embed()
    """
    # aggregate grasps
    object_grasps = [candidates[i].grasp for i in ts_result.best_candidates]
    grasp_qualities = list(ts_result.best_pred_means)

    bandit_stop = time.clock()
    logging.info('Bandits took %f sec' %(bandit_stop - bandit_start))

    # get rotated, translated versions of grasps
    delay = 0
    pr2_grasps = []
    pr2_grasp_qualities = []
    theta_res = config['grasp_theta_res'] * np.pi
#    grasp_checker = pgc.OpenRaveGraspChecker(view=config['vis_grasps'])

    if config['vis_grasps']:
        delay = config['vis_delay']

    for grasp, grasp_quality in zip(object_grasps, grasp_qualities):
        rotated_grasps = grasp.transform(obj.tf, theta_res)
#        rotated_grasps = grasp_checker.prune_grasps_in_collision(obj, rotated_grasps, auto_step=True, close_fingers=False, delay=delay)
        pr2_grasps.extend(rotated_grasps)
        pr2_grasp_qualities.extend([grasp_quality] * len(rotated_grasps))

    logging.info('Num grasps: %d' %(len(pr2_grasps)))

    grasp_filename = os.path.join(dest, obj.key + '.json')
    with open(grasp_filename, 'w') as f:
        jsons.dump([g.to_json(quality=q) for g, q in
                   zip(pr2_grasps, pr2_grasp_qualities)], f)

    ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc)
    ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
    ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc)

    return BanditCorrelatedExperimentResult(ua_normalized_reward, ts_normalized_reward, ts_corr_normalized_reward,
                                            ua_result, ts_result, ts_corr_result, obj_key=obj.key)
                                            """
    return None
Exemplo n.º 18
0
Arquivo: pfc.py Projeto: puneetp/GPIS
def test_antipodal_grasp_thompson():
    np.random.seed(100)

    #    h = plt.figure()
    #    ax = h.add_subplot(111, projection = '3d')

    # load object
    sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf'
    sf = sdf_file.SdfFile(sdf_3d_file_name)
    sdf_3d = sf.read()

    mesh_name = 'data/test/meshes/Co_clean.obj'
    of = obj_file.ObjFile(mesh_name)
    m = of.read()

    graspable = go.GraspableObject3D(sdf_3d, mesh=m, model_name=mesh_name)

    config = {
        'grasp_width': 0.1,
        'friction_coef': 0.5,
        'num_cone_faces': 8,
        'grasp_samples_per_surface_point': 4,
        'dir_prior': 1.0,
        'alpha_thresh_div': 32,
        'rho_thresh': 0.75,  # as pct of object max moment
        'vis_antipodal': False,
        'min_num_grasps': 20,
        'alpha_inc': 1.1,
        'rho_inc': 1.1,
        'sigma_mu': 0.1,
        'sigma_trans_grasp': 0.001,
        'sigma_rot_grasp': 0.1,
        'sigma_trans_obj': 0.001,
        'sigma_rot_obj': 0.1,
        'sigma_scale_obj': 0.1,
        'num_prealloc_obj_samples': 100,
        'num_prealloc_grasp_samples': 0,
        'min_num_collision_free_grasps': 10,
        'grasp_theta_res': 1
    }
    sampler = ags.AntipodalGraspSampler(config)

    start_time = time.clock()
    grasps, alpha_thresh, rho_thresh = sampler.generate_grasps(graspable,
                                                               vis=False)
    end_time = time.clock()
    duration = end_time - start_time
    logging.info('Antipodal grasp candidate generation took %f sec' %
                 (duration))

    # convert grasps to RVs for optimization
    graspable_rv = GraspableObjectGaussianPose(graspable, config)
    f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu'])
    candidates = []
    for grasp in grasps:
        grasp_rv = ParallelJawGraspGaussian(grasp, config)
        candidates.append(ForceClosureRV(grasp_rv, graspable_rv, f_rv, config))

    objective = objectives.RandomBinaryObjective()

    # run bandits
    eps = 5e-4
    ua_tc_list = [tc.MaxIterTerminationCondition(1000)
                  ]  #, tc.ConfidenceTerminationCondition(eps)]
    ua = das.UniformAllocationMean(objective, candidates)
    ua_result = ua.solve(
        termination_condition=tc.OrTerminationCondition(ua_tc_list),
        snapshot_rate=100)
    logging.info('Uniform allocation took %f sec' % (ua_result.total_time))

    ts_tc_list = [
        tc.MaxIterTerminationCondition(1000),
        tc.ConfidenceTerminationCondition(eps)
    ]
    ts = das.ThompsonSampling(objective, candidates)
    ts_result = ts.solve(
        termination_condition=tc.OrTerminationCondition(ts_tc_list),
        snapshot_rate=100)
    logging.info('Thompson sampling took %f sec' % (ts_result.total_time))

    true_means = models.BetaBernoulliModel.beta_mean(
        ua_result.models[-1].alphas, ua_result.models[-1].betas)

    # plot results
    plt.figure()
    plot_value_vs_time_beta_bernoulli(ua_result, true_means, color='red')
    plot_value_vs_time_beta_bernoulli(ts_result, true_means, color='blue')
    plt.show()

    das.plot_num_pulls_beta_bernoulli(ua_result)
    plt.title('Observations Per Variable for Uniform allocation')

    das.plot_num_pulls_beta_bernoulli(ts_result)
    plt.title('Observations Per Variable for Thompson sampling')

    plt.show()
Exemplo n.º 19
0
def extract_features(obj, dest, feature_dest, config):
    # sample grasps
    sample_start = time.clock()
    if config['grasp_sampler'] == 'antipodal':
        logging.info('Using antipodal grasp sampling')
        sampler = ags.AntipodalGraspSampler(config)
        grasps = sampler.generate_grasps(
            obj, check_collisions=config['check_collisions'])

        # pad with gaussian grasps
        num_grasps = len(grasps)
        min_num_grasps = config['min_num_grasps']
        if num_grasps < min_num_grasps:
            target_num_grasps = min_num_grasps - num_grasps
            gaussian_sampler = gs.GaussianGraspSampler(config)
            gaussian_grasps = gaussian_sampler.generate_grasps(
                obj,
                target_num_grasps=target_num_grasps,
                check_collisions=config['check_collisions'])
            grasps.extend(gaussian_grasps)
    else:
        logging.info('Using Gaussian grasp sampling')
        sampler = gs.GaussianGraspSampler(config)
        grasps = sampler.generate_grasps(
            obj, check_collisions=config['check_collisions'])

    sample_end = time.clock()
    sample_duration = sample_end - sample_start
    logging.info('Grasp candidate generation took %f sec' % (sample_duration))

    if not grasps or len(grasps) == 0:
        logging.info('Skipping %s' % (obj.key))
        return

    # compute all features
    feature_start = time.clock()
    feature_extractor = ff.GraspableFeatureExtractor(obj, config)
    all_features = feature_extractor.compute_all_features(grasps)
    feature_end = time.clock()
    feature_duration = feature_end - feature_start
    logging.info('Feature extraction took %f sec' % (feature_duration))

    # generate pfc candidates
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu'])
    candidates = []
    logging.info('%d grasps, %d valid features', len(grasps),
                 len(all_features) - all_features.count(None))
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d candidate' % (len(candidates)))
        if features is None:
            logging.info('No features computed.')
            continue
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        pfc_rv.set_features(features)
        candidates.append(pfc_rv)
    logging.info('%d candidates', len(candidates))

    # brute force with uniform allocation
    brute_force_iter = config['bandit_brute_force_iter']
    snapshot_rate = config['bandit_snapshot_rate']

    def phi(rv):
        return rv.features

    objective = objectives.RandomBinaryObjective()

    ua = das.UniformAllocationMean(objective, candidates)
    logging.info('Running uniform allocation for true pfc.')
    bandit_start = time.clock()
    ua_result = ua.solve(
        termination_condition=tc.MaxIterTerminationCondition(brute_force_iter),
        snapshot_rate=snapshot_rate)
    bandit_end = time.clock()
    bandit_duration = bandit_end - bandit_start
    logging.info('Uniform allocation (%d iters) took %f sec' %
                 (brute_force_iter, bandit_duration))

    cand_grasps = [c.grasp for c in candidates]
    cand_features = [c.features_ for c in candidates]
    final_model = ua_result.models[-1]
    estimated_pfc = models.BetaBernoulliModel.beta_mean(
        final_model.alphas, final_model.betas)

    if len(cand_grasps) != len(estimated_pfc):
        logging.warning(
            'Number of grasps does not match estimated pfc results.')
        IPython.embed()

    # write to file
    grasp_filename = os.path.join(dest, obj.key + '.json')
    with open(grasp_filename, 'w') as grasp_file:
        jsons.dump([
            g.to_json(quality=q, num_successes=a, num_failures=b)
            for g, q, a, b in zip(cand_grasps, estimated_pfc,
                                  final_model.alphas, final_model.betas)
        ], grasp_file)

    # HACK to make paths relative
    features_as_json = [f.to_json(feature_dest) for f in cand_features]
    output_dest = os.path.split(dest)[0]
    for feature_as_json in features_as_json:
        feature_as_json = list(feature_as_json.values())[0]
        for wname in ('w1', 'w2'):
            wdata = feature_as_json[wname]
            for k, v in wdata.items():
                wdata[k] = os.path.relpath(
                    v, output_dest)  # relative to output_dest
    feature_filename = os.path.join(feature_dest, obj.key + '.json')
    with open(feature_filename, 'w') as feature_file:
        jsons.dump(features_as_json, feature_file)
Exemplo n.º 20
0
def eval_hyperparams(obj,
                     chunk,
                     config,
                     plot=False,
                     priors_dataset=None,
                     nearest_features_names=None):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    # bandit params
    num_trials = config['num_trials']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
    ]

    bandit_start = time.clock()

    np.random.seed(100)

    candidates = load_candidate_grasps(obj, chunk)
    if candidates is None:
        return None

    # feature transform
    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)
    objective = objectives.RandomBinaryObjective()

    # compute priors
    logging.info('Computing priors')
    if priors_dataset is None:
        priors_dataset = chunk
    prior_engine = pce.PriorComputationEngine(priors_dataset, config)

    # Compute priors
    all_alpha_priors = []
    all_beta_priors = []
    prior_comp_times = []
    if nearest_features_names == None:
        alpha_priors, beta_priors = prior_engine.compute_priors(
            obj, candidates)
        all_alpha_priors.append(alpha_priors)
        all_beta_priors.append(beta_priors)
    else:
        for nearest_features_name in nearest_features_names:
            logging.info('Computing priors using %s' % (nearest_features_name))
            priors_start_time = time.time()
            alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \
                prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name)

            all_alpha_priors.append(alpha_priors)
            all_beta_priors.append(beta_priors)
            priors_end_time = time.time()
            prior_comp_times.append(priors_end_time - priors_start_time)
            logging.info(
                'Priors for %s took %f' %
                (nearest_features_name, priors_end_time - priors_start_time))

    # pre-computed pfc values
    logging.info('Computing regression errors')
    true_pfc = np.array([c.grasp.quality for c in candidates])
    prior_alphas = np.ones(true_pfc.shape)
    prior_betas = np.ones(true_pfc.shape)
    prior_pfc = 0.5 * np.ones(true_pfc.shape)

    ce_loss = objectives.CrossEntropyLoss(true_pfc)
    se_loss = objectives.SquaredErrorLoss(true_pfc)
    we_loss = objectives.WeightedSquaredErrorLoss(true_pfc)
    ccbp_ll = objectives.CCBPLogLikelihood(true_pfc)
    ce_vals = [ce_loss(prior_pfc)]
    se_vals = [se_loss(prior_pfc)]
    we_vals = [se_loss(prior_pfc)]  # uniform weights at first
    ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)]
    total_weights = [len(candidates)]

    # compute estimated pfc values from alphas and betas
    for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors):
        estimated_pfc = models.BetaBernoulliModel.beta_mean(
            np.array(alpha_prior), np.array(beta_prior))
        estimated_vars = models.BetaBernoulliModel.beta_variance(
            np.array(alpha_prior), np.array(beta_prior))

        # compute losses
        ce_vals.append(ce_loss(estimated_pfc))
        se_vals.append(se_loss(estimated_pfc))
        we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars))
        ccbp_vals.append(
            ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior)))
        total_weights.append(np.sum(estimated_vars))

    ce_vals = np.array(ce_vals)
    se_vals = np.array(se_vals)
    we_vals = np.array(we_vals)
    ccbp_vals = np.array(ccbp_vals)
    total_weights = np.array(total_weights)

    # create hyperparam dict
    num_grasps = len(candidates)
    hyperparams = {}
    hyperparams['weight_grad'] = config['weight_grad_x']
    hyperparams['weight_moment'] = config['weight_gravity']
    hyperparams['weight_shape'] = config['prior_neighbor_weight']
    hyperparams['num_neighbors'] = config['prior_num_neighbors']
    return HyperparamEvalResult(ce_vals,
                                se_vals,
                                we_vals,
                                ccbp_vals,
                                num_grasps,
                                total_weights,
                                hyperparams,
                                prior_comp_times,
                                obj_key=obj.key,
                                neighbor_keys=neighbor_keys)
def test_window_correlation(width, num_steps, vis=True):
    import scipy
    import sdf_file, obj_file
    import discrete_adaptive_samplers as das
    import experiment_config as ec
    import feature_functions as ff
    import graspable_object as go  # weird Python issues
    import kernels
    import models
    import objectives
    import pfc
    import termination_conditions as tc

    np.random.seed(100)

    mesh_file_name = 'data/test/meshes/Co_clean.obj'
    sdf_3d_file_name = 'data/test/sdf/Co_clean.sdf'

    config = ec.ExperimentConfig('cfg/correlated.yaml')
    config['window_width'] = width
    config['window_steps'] = num_steps
    brute_force_iter = 100
    snapshot_rate = config['bandit_snapshot_rate']

    sdf = sdf_file.SdfFile(sdf_3d_file_name).read()
    mesh = obj_file.ObjFile(mesh_file_name).read()
    graspable = go.GraspableObject3D(sdf, mesh)
    grasp_axis = np.array([0, 1, 0])
    grasp_width = 0.1

    grasps = []
    for z in [-0.030, -0.035, -0.040, -0.045]:
        grasp_center = np.array([0, 0, z])
        grasp = g.ParallelJawPtGrasp3D(
            ParallelJawPtGrasp3D.configuration_from_params(
                grasp_center, grasp_axis, grasp_width))
        grasps.append(grasp)

    graspable_rv = pfc.GraspableObjectGaussianPose(graspable, config)
    f_rv = scipy.stats.norm(config['friction_coef'],
                            config['sigma_mu'])  # friction Gaussian RV

    # compute feature vectors for all grasps
    feature_extractor = ff.GraspableFeatureExtractor(graspable, config)
    all_features = feature_extractor.compute_all_features(grasps)

    candidates = []
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d' % len(candidates))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        pfc_rv.set_features(features)
        candidates.append(pfc_rv)

        if vis:
            _, (c1, c2) = grasp.close_fingers(graspable)
            plt.figure()
            c1_proxy = c1.plot_friction_cone(color='m')
            c2_proxy = c2.plot_friction_cone(color='y')
            plt.legend([c1_proxy, c2_proxy], ['Cone 1', 'Cone 2'])
            plt.title('Grasp %d' % (len(candidates)))

    objective = objectives.RandomBinaryObjective()
    ua = das.UniformAllocationMean(objective, candidates)
    logging.info('Running uniform allocation for true pfc.')
    ua_result = ua.solve(
        termination_condition=tc.MaxIterTerminationCondition(brute_force_iter),
        snapshot_rate=snapshot_rate)
    estimated_pfc = models.BetaBernoulliModel.beta_mean(
        ua_result.models[-1].alphas, ua_result.models[-1].betas)

    print 'true pfc'
    print estimated_pfc

    def phi(rv):
        return rv.features

    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)

    print 'kernel matrix'
    print kernel.matrix(candidates)

    if vis:
        plt.show()
Exemplo n.º 22
0
def label_correlated(obj,
                     chunk,
                     config,
                     plot=False,
                     priors_dataset=None,
                     nearest_features_names=None):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    # bandit params
    num_trials = config['num_trials']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
    ]

    bandit_start = time.clock()

    np.random.seed(100)

    candidates = load_candidate_grasps(obj, chunk)
    if candidates is None:
        return None

    # feature transform
    def phi(rv):
        return rv.features

    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(sigma=config['kernel_sigma'],
                                              l=config['kernel_l'],
                                              phi=phi)
    objective = objectives.RandomBinaryObjective()

    # compute priors
    logging.info('Computing priors')
    if priors_dataset is None:
        priors_dataset = chunk
    prior_engine = pce.PriorComputationEngine(priors_dataset, config)

    # Compute priors
    all_alpha_priors = []
    all_beta_priors = []
    prior_comp_times = []
    if nearest_features_names == None:
        alpha_priors, beta_priors = prior_engine.compute_priors(
            obj, candidates)
        all_alpha_priors.append(alpha_priors)
        all_beta_priors.append(beta_priors)
    else:
        for nearest_features_name in nearest_features_names:
            logging.info('Computing priors using %s' % (nearest_features_name))
            priors_start_time = time.time()
            alpha_priors, beta_priors, neighbor_keys, neighbor_distances, neighbor_kernels, neighbor_pfc_diffs, num_grasp_neighbors = \
                prior_engine.compute_priors(obj, candidates, nearest_features_name=nearest_features_name)

            all_alpha_priors.append(alpha_priors)
            all_beta_priors.append(beta_priors)
            priors_end_time = time.time()
            prior_comp_times.append(priors_end_time - priors_start_time)
            logging.info(
                'Priors for %s took %f' %
                (nearest_features_name, priors_end_time - priors_start_time))

    # pre-computed pfc values
    logging.info('Computing regression errors')
    true_pfc = np.array([c.grasp.quality for c in candidates])
    prior_alphas = np.ones(true_pfc.shape)
    prior_betas = np.ones(true_pfc.shape)
    prior_pfc = 0.5 * np.ones(true_pfc.shape)

    ce_loss = objectives.CrossEntropyLoss(true_pfc)
    se_loss = objectives.SquaredErrorLoss(true_pfc)
    we_loss = objectives.WeightedSquaredErrorLoss(true_pfc)
    ccbp_ll = objectives.CCBPLogLikelihood(true_pfc)
    ce_vals = [ce_loss(prior_pfc)]
    se_vals = [se_loss(prior_pfc)]
    we_vals = [se_loss(prior_pfc)]  # uniform weights at first
    ccbp_vals = [ccbp_ll.evaluate(prior_alphas, prior_betas)]
    total_weights = [len(candidates)]

    # compute estimated pfc values from alphas and betas
    for alpha_prior, beta_prior in zip(all_alpha_priors, all_beta_priors):
        estimated_pfc = models.BetaBernoulliModel.beta_mean(
            np.array(alpha_prior), np.array(beta_prior))
        estimated_vars = models.BetaBernoulliModel.beta_variance(
            np.array(alpha_prior), np.array(beta_prior))

        # compute losses
        ce_vals.append(ce_loss(estimated_pfc))
        se_vals.append(se_loss(estimated_pfc))
        we_vals.append(we_loss.evaluate(estimated_pfc, estimated_vars))
        ccbp_vals.append(
            ccbp_ll.evaluate(np.array(alpha_prior), np.array(beta_prior)))
        total_weights.append(np.sum(estimated_vars))

    ce_vals = np.array(ce_vals)
    se_vals = np.array(se_vals)
    we_vals = np.array(we_vals)
    ccbp_vals = np.array(ccbp_vals)
    total_weights = np.array(total_weights)

    # setup reward buffers
    ua_rewards = []
    ts_rewards = []
    gi_rewards = []
    ts_corr_rewards = []
    bucb_corr_rewards = []
    all_ts_corr_prior_rewards = []
    for x in range(0, len(all_alpha_priors)):
        all_ts_corr_prior_rewards.append([])
    all_bucb_corr_prior_rewards = []
    for x in range(0, len(all_alpha_priors)):
        all_bucb_corr_prior_rewards.append([])

    # setup runtime buffers
    ua_runtimes = []
    ts_runtimes = []
    gi_runtimes = []
    ts_corr_runtimes = []
    bucb_corr_runtimes = []
    all_ts_corr_prior_runtimes = []
    for x in range(0, len(all_alpha_priors)):
        all_ts_corr_prior_runtimes.append([])
    all_bucb_corr_prior_runtimes = []
    for x in range(0, len(all_alpha_priors)):
        all_bucb_corr_prior_runtimes.append([])

    # run bandits for several trials
    logging.info('Running bandits')
    for t in range(num_trials):
        logging.info('Trial %d' % (t))

        # Uniform sampling
        ua = das.UniformAllocationMean(objective, candidates)
        logging.info('Running Uniform allocation.')
        ua_result = ua.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # Thompson sampling
        ts = das.ThompsonSampling(objective, candidates)
        logging.info('Running Thompson sampling.')
        ts_result = ts.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # Gittins indices
        gi = das.GittinsIndex98(objective, candidates)
        logging.info('Running Gittins Indices.')
        gi_result = gi.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # correlated Thompson sampling for even faster convergence
        ts_corr = das.CorrelatedThompsonSampling(
            objective,
            candidates,
            nn,
            kernel,
            tolerance=config['kernel_tolerance'],
            p=config['lb_alpha'])
        logging.info('Running correlated Thompson sampling.')
        ts_corr_result = ts_corr.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # correlated Thompson sampling for even faster convergence
        bucb_corr = das.CorrelatedGittins(
            objective,
            candidates,
            nn,
            kernel,
            tolerance=config['kernel_tolerance'],
            p=config['lb_alpha'])  #horizon=max_iter)
        logging.info('Running correlated Bayes UCB.')
        bucb_corr_result = bucb_corr.solve(
            termination_condition=tc.OrTerminationCondition(tc_list),
            snapshot_rate=snapshot_rate)

        # correlated MAB for faster convergence
        all_ts_corr_prior_ind = []
        all_bucb_corr_prior_ind = []
        for alpha_priors, beta_priors, ts_corr_prior_rewards, bucb_corr_prior_rewards, ts_corr_runtimes, bucb_corr_runtimes, nearest_features_name in \
                zip(all_alpha_priors, all_beta_priors, all_ts_corr_prior_rewards, all_bucb_corr_prior_rewards, all_ts_corr_prior_runtimes, all_bucb_corr_prior_runtimes, nearest_features_names):
            # thompson sampling
            ts_corr_prior = das.CorrelatedThompsonSampling(
                objective,
                candidates,
                nn,
                kernel,
                tolerance=config['kernel_tolerance'],
                alpha_prior=alpha_priors,
                beta_prior=beta_priors,
                p=config['lb_alpha'])
            logging.info(
                'Running correlated Thompson sampling with priors from %s' %
                (nearest_features_name))
            ts_corr_prior_result = ts_corr_prior.solve(
                termination_condition=tc.OrTerminationCondition(tc_list),
                snapshot_rate=snapshot_rate)
            ts_corr_prior_normalized_reward = reward_vs_iters(
                ts_corr_prior_result, true_pfc)
            ts_corr_prior_rewards.append(ts_corr_prior_normalized_reward)
            ts_corr_runtimes.append(ts_corr_prior_result.total_time)
            all_ts_corr_prior_ind.append(ts_corr_prior_result.best_pred_ind)

            # bayes ucb
            bucb_corr = das.CorrelatedGittins(
                objective,
                candidates,
                nn,
                kernel,
                tolerance=config['kernel_tolerance'],  #horizon=max_iter,
                alpha_prior=alpha_priors,
                beta_prior=beta_priors,
                p=config['lb_alpha'])
            logging.info('Running correlated Bayes UCB with priors from %s' %
                         (nearest_features_name))
            bucb_corr_prior_result = bucb_corr.solve(
                termination_condition=tc.OrTerminationCondition(tc_list),
                snapshot_rate=snapshot_rate)
            bucb_corr_prior_normalized_reward = reward_vs_iters(
                bucb_corr_prior_result, true_pfc)
            bucb_corr_prior_rewards.append(bucb_corr_prior_normalized_reward)
            bucb_corr_runtimes.append(bucb_corr_prior_result.total_time)
            all_bucb_corr_prior_ind.append(
                bucb_corr_prior_result.best_pred_ind)

        # compile results
        ua_normalized_reward = reward_vs_iters(ua_result, true_pfc)
        ts_normalized_reward = reward_vs_iters(ts_result, true_pfc)
        gi_normalized_reward = reward_vs_iters(gi_result, true_pfc)
        ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, true_pfc)
        bucb_corr_normalized_reward = reward_vs_iters(bucb_corr_result,
                                                      true_pfc)

        ua_rewards.append(ua_normalized_reward)
        ts_rewards.append(ts_normalized_reward)
        gi_rewards.append(gi_normalized_reward)
        ts_corr_rewards.append(ts_corr_normalized_reward)
        bucb_corr_rewards.append(bucb_corr_normalized_reward)

        ua_runtimes.append(ua_result.total_time)
        ts_runtimes.append(ts_result.total_time)
        gi_runtimes.append(gi_result.total_time)
        ts_corr_runtimes.append(ts_corr_result.total_time)
        bucb_corr_runtimes.append(bucb_corr_result.total_time)

    if num_trials == 0:
        return None

    # get the bandit rewards
    all_ua_rewards = np.array(ua_rewards)
    all_ts_rewards = np.array(ts_rewards)
    all_gi_rewards = np.array(gi_rewards)
    all_ts_corr_rewards = np.array(ts_corr_rewards)
    all_bucb_corr_rewards = np.array(bucb_corr_rewards)

    all_avg_ts_corr_prior_rewards = []
    for ts_corr_prior_rewards in all_ts_corr_prior_rewards:
        all_avg_ts_corr_prior_rewards.append(
            np.mean(np.array(ts_corr_prior_rewards), axis=0))

    all_avg_bucb_corr_prior_rewards = []
    for bucb_corr_prior_rewards in all_bucb_corr_prior_rewards:
        all_avg_bucb_corr_prior_rewards.append(
            np.mean(np.array(bucb_corr_prior_rewards), axis=0))
        #all_avg_bucb_corr_prior_rewards.append([])

    # get bandit indices
    ua_ind = ua_result.best_pred_ind
    ts_ind = ts_result.best_pred_ind
    ts_corr_ind = ts_corr_result.best_pred_ind
    bucb_corr_ind = bucb_corr_result.best_pred_ind

    # compute avg normalized rewards
    avg_ua_rewards = np.mean(all_ua_rewards, axis=0)
    avg_ts_rewards = np.mean(all_ts_rewards, axis=0)
    avg_gi_rewards = np.mean(all_gi_rewards, axis=0)
    avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0)
    avg_bucb_corr_rewards = np.mean(all_bucb_corr_rewards, axis=0)
    #avg_bucb_corr_rewards = all_bucb_corr_rewards

    # compute avg runtimes
    avg_ua_runtimes = np.mean(np.array(ua_runtimes), axis=0)
    avg_ts_runtimes = np.mean(np.array(ts_runtimes), axis=0)
    avg_ts_corr_runtimes = np.mean(np.array(ts_corr_runtimes), axis=0)
    avg_bucb_corr_runtimes = np.mean(np.array(bucb_corr_runtimes), axis=0)
    all_avg_ts_corr_prior_runtimes = []
    for ts_corr_prior_runtimes in all_ts_corr_prior_runtimes:
        all_avg_ts_corr_prior_runtimes.append(
            np.mean(np.array(ts_corr_prior_runtimes), axis=0))
    all_avg_bucb_corr_prior_runtimes = []
    for bucb_corr_prior_runtimes in all_bucb_corr_prior_runtimes:
        all_avg_bucb_corr_prior_runtimes.append(
            np.mean(np.array(bucb_corr_prior_runtimes), axis=0))

    # kernel matrix
    kernel_matrix = kernel.matrix(candidates)

    return BanditCorrelatedPriorExperimentResult(
        avg_ua_rewards,
        avg_ts_rewards,
        avg_gi_rewards,
        avg_ts_corr_rewards,
        avg_bucb_corr_rewards,
        all_avg_ts_corr_prior_rewards,
        all_avg_bucb_corr_prior_rewards,
        true_pfc,
        ua_result.iters,
        kernel_matrix, [], [], [],
        ce_vals,
        ccbp_vals,
        we_vals,
        len(candidates),
        total_weights,
        ua_ind,
        ts_ind,
        ts_corr_ind,
        bucb_corr_ind,
        all_ts_corr_prior_ind,
        all_bucb_corr_prior_ind,
        avg_ua_runtimes,
        avg_ts_runtimes,
        avg_ts_corr_runtimes,
        avg_bucb_corr_runtimes,
        all_avg_ts_corr_prior_runtimes,
        all_avg_bucb_corr_prior_runtimes,
        prior_comp_times,
        obj_key=obj.key,
        neighbor_keys=neighbor_keys)
Exemplo n.º 23
0
        grasps, data = load_data('grasp_features.hdf5', config)
    successes = np.array([g.successes for g in grasps]) - 1 # subtract alpha0
    failures = np.array([g.failures for g in grasps]) - 1 # subtract beta0

    loss = StochasticGraspWeightObjective(data, successes, failures, config)
    objective = MinimizationObjective(loss)
    step_policy = ilo.LogStepPolicy(config['step_size_max'], config['step_size_period'])
    def positive_constraint(x):
        x[x < 0] = 0
        return x
    optimizer = ilo.ConstrainedGradientAscent(objective, step_policy,
                                              [positive_constraint])
    start = config['weight_initial'] * np.ones(2 * config['window_steps']**2)

    logging.info('Starting optimization.')
    result = optimizer.solve(termination_condition=tc.MaxIterTerminationCondition(config['max_iters']),
                             snapshot_rate=config['snapshot_rate'], start_x=start, true_x=None)

    proj_win_weight = result.best_x
    max_weight = np.max(proj_win_weight)
    opt_weights = proj_win_weight.reshape((2, config['window_steps'], config['window_steps']))
    rand_weights = start.reshape((2, config['window_steps'], config['window_steps']))

    logging.info('Loss: %f to %f, delta=%f', loss(start), loss(result.best_x), np.linalg.norm(start - result.best_x))

    # debugging stuff

    def min_and_max(arr):
        return np.min(arr), np.max(arr)

    ground_truth = loss.mu_
Exemplo n.º 24
0
def label_correlated(obj, chunk, dest, config, plot=False, load=True):
    """Label an object with grasps according to probability of force closure,
    using correlated bandits."""
    bandit_start = time.clock()
    #np.random.seed(100)

    # sample grasps
    sample_start = time.clock()
                              
    if config['grasp_sampler'] == 'antipodal':
        logging.info('Using antipodal grasp sampling')
        sampler = ags.AntipodalGraspSampler(config)
        grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=False)

        # pad with gaussian grasps
        num_grasps = len(grasps)
        min_num_grasps = config['min_num_grasps']
        if num_grasps < min_num_grasps:
            target_num_grasps = min_num_grasps - num_grasps
            gaussian_sampler = gs.GaussianGraspSampler(config)        
            gaussian_grasps = gaussian_sampler.generate_grasps(obj, target_num_grasps=target_num_grasps,
                                                                   check_collisions=config['check_collisions'], vis=plot)
            grasps.extend(gaussian_grasps)
    else:
        logging.info('Using Gaussian grasp sampling')
        sampler = gs.GaussianGraspSampler(config)        
        grasps = sampler.generate_grasps(obj, check_collisions=config['check_collisions'], vis=plot,
                                             grasp_gen_mult = 6)
    sample_end = time.clock()
    sample_duration = sample_end - sample_start
    logging.info('Loaded %d grasps' %(len(grasps)))
    logging.info('Grasp candidate loading took %f sec' %(sample_duration))

    if not grasps:
        logging.info('Skipping %s' %(obj.key))
        return None

    # extract load features for all grasps
    feature_start = time.clock()
    feature_extractor = ff.GraspableFeatureExtractor(obj, config)
    all_features = feature_extractor.compute_all_features(grasps)
    feature_end = time.clock()
    feature_duration = feature_end - feature_start
    logging.info('Loaded %d features' %(len(all_features)))
    logging.info('Grasp feature loading took %f sec' %(feature_duration))

    # bandit params
    num_trials = config['num_trials']
    brute_force_iter = config['bandit_brute_force_iter']
    max_iter = config['bandit_max_iter']
    confidence = config['bandit_confidence']
    snapshot_rate = config['bandit_snapshot_rate']
    brute_snapshot_rate = config['bandit_brute_force_snapshot_rate']
    tc_list = [
        tc.MaxIterTerminationCondition(max_iter),
        ]

    # set up randome variables
    graspable_rv = pfc.GraspableObjectGaussianPose(obj, config)
    f_rv = scipy.stats.norm(config['friction_coef'], config['sigma_mu']) # friction Gaussian RV

    candidates = []
    for grasp, features in zip(grasps, all_features):
        logging.info('Adding grasp %d' %len(candidates))
        grasp_rv = pfc.ParallelJawGraspGaussian(grasp, config)
        pfc_rv = pfc.ForceClosureRV(grasp_rv, graspable_rv, f_rv, config)
        if features is None:
            logging.info('Could not compute features for grasp.')
        else:
            pfc_rv.set_features(features)
            candidates.append(pfc_rv)

    # feature transform
    def phi(rv):
        return rv.features

    # create nn structs for kernels
    nn = kernels.KDTree(phi=phi)
    kernel = kernels.SquaredExponentialKernel(
        sigma=config['kernel_sigma'], l=config['kernel_l'], phi=phi)
    objective = objectives.RandomBinaryObjective()

    # uniform allocation for true values
    ua_brute = das.UniformAllocationMean(objective, candidates)
    logging.info('Running uniform allocation for true pfc.')
    ua_brute_result = ua_brute.solve(termination_condition=tc.MaxIterTerminationCondition(brute_force_iter),
                                     snapshot_rate=brute_snapshot_rate)
    final_model = ua_brute_result.models[-1]
    estimated_pfc = models.BetaBernoulliModel.beta_mean(final_model.alphas, final_model.betas)
    save_grasps(grasps, estimated_pfc, obj, dest, num_successes=final_model.alphas, num_failures=final_model.betas)

    # run bandits for several trials
    ua_rewards = []
    ts_rewards = []
    ts_corr_rewards = []

    for t in range(num_trials):
        logging.info('Trial %d' %(t))

        # Uniform sampling
        ua = das.UniformAllocationMean(objective, candidates)
        logging.info('Running Uniform allocation.')
        ua_result = ua.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate)

        # Thompson sampling
        ts = das.ThompsonSampling(objective, candidates)
        logging.info('Running Thompson sampling.')
        ts_result = ts.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate)

        # correlated Thompson sampling for even faster convergence
        ts_corr = das.CorrelatedThompsonSampling(
            objective, candidates, nn, kernel, tolerance=config['kernel_tolerance'])
        logging.info('Running correlated Thompson sampling.')
        ts_corr_result = ts_corr.solve(termination_condition=tc.OrTerminationCondition(tc_list), snapshot_rate=snapshot_rate)

        # compile results
        ua_normalized_reward = reward_vs_iters(ua_result, estimated_pfc)
        ts_normalized_reward = reward_vs_iters(ts_result, estimated_pfc)
        ts_corr_normalized_reward = reward_vs_iters(ts_corr_result, estimated_pfc)
        
        ua_rewards.append(ua_normalized_reward)
        ts_rewards.append(ts_normalized_reward)
        ts_corr_rewards.append(ts_corr_normalized_reward)

    # get the bandit rewards
    all_ua_rewards = np.array(ua_rewards)
    all_ts_rewards = np.array(ts_rewards)
    all_ts_corr_rewards = np.array(ts_corr_rewards)

    # compute avg normalized rewards
    avg_ua_rewards = np.mean(all_ua_rewards, axis=0)
    avg_ts_rewards = np.mean(all_ts_rewards, axis=0)
    avg_ts_corr_rewards = np.mean(all_ts_corr_rewards, axis=0)

    # kernel matrix
    kernel_matrix = kernel.matrix(candidates)

    return BanditCorrelatedExperimentResult(avg_ua_rewards, avg_ts_rewards, avg_ts_corr_rewards,
                                            estimated_pfc, ua_result.iters, kernel_matrix, obj_key=obj.key)