def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    gpuId = 0

    # objects
    objectHeight = [0.007, 0.013]
    objectRadius = [0.030, 0.045]
    nObjects = 1000

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.60
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]

    # visualization/saving
    showViewer = False
    showSteps = False
    plotImages = False

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(showViewer, removeTable=True)
    rlAgent = RlAgent(rlEnv, gpuId)

    # RUN TEST =======================================================================================

    for objIdx in xrange(nObjects):

        obj = rlEnv.PlaceCylinderAtOrigin(objectHeight, objectRadius,
                                          "cylinder-{}".format(objIdx), True)
        cloud, normals = rlAgent.GetFullCloudAndNormals(
            viewCenter, viewKeepout, viewWorkspace)
        point_cloud.SaveMat("cylinder-{}.mat".format(objIdx), cloud, normals)

        rlAgent.PlotCloud(cloud)
        if plotImages:
            point_cloud.Plot(cloud, normals, 2)

        if showSteps:
            raw_input("Placed cylinder-{}.".format(objIdx))

        rlEnv.RemoveObjectSet([obj])
示例#2
0
def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    gpuId = 0

    # objects
    objectScale = [0.09, 0.17]
    nObjects = 1000
    directory = "/home/mgualti/Data/3DNet/Cat200_ModelDatabase/plate/"

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.60
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]

    # visualization/saving
    showViewer = False
    showSteps = False
    plotImages = False

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(showViewer, removeTable=True)
    rlAgent = RlAgent(rlEnv, gpuId)

    # RUN TEST =======================================================================================

    for objIdx in xrange(nObjects):

        obj = rlEnv.Place3DNetObjectAtOrigin(directory, objectScale,
                                             "plate-{}".format(objIdx), True)
        cloud, normals = rlAgent.GetFullCloudAndNormals(
            viewCenter, viewKeepout, viewWorkspace, False)
        point_cloud.SaveMat("plate-{}.mat".format(objIdx), cloud, normals)

        rlAgent.PlotCloud(cloud)
        if plotImages:
            point_cloud.Plot(cloud, normals, 2)

        if showSteps:
            raw_input("Placed plate-{}.".format(objIdx))

        rlEnv.RemoveObjectSet([obj])
  def __init__(self, rlEnvironment, gpuId, hasHistory, nSamples):
    '''Initializes agent in the given environment.'''

    RlAgent.__init__(self, rlEnvironment, gpuId)

    # parameters
    self.nSamples = nSamples
    self.hasHistory = hasHistory
    self.emptyStateImage = zeros((3, 60, 60), dtype='float')
    self.emptyStateVector = zeros(6, dtype='float')
    self.emptyState = [self.emptyStateImage, self.emptyStateVector]
    self.minValue = -float('inf'); self.maxValue = float('inf')

    # network parameters
    self.conv1aOutputs = 12 # LeNet 20 outputs
    self.conv1sOutputs = 12 # LeNet 20 outputs
    self.conv2aOutputs = 24 # LeNet 50 outputs
    self.conv2sOutputs = 24 # LeNet 50 outputs
    self.ip1aOutputs = 200 # LeNet 500 outputs
    self.ip1sOutputs = 200 # LeNet 500 outputs
    self.ip1Outputs = 50 # MarcNet 60 outputs
    self.ip2Outputs = 50 # MarcNet 60 outputs
def main():
    '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

    # PARAMETERS =====================================================================================

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironment(True)
    rlAgent = RlAgent(rlEnv)

    # RUN TEST =======================================================================================

    for placeIdx, placePose in enumerate(rlAgent.placePoses):

        print("Showing pose {}: ".format(placeIdx))
        print placePose

        rlAgent.MoveHandToPose(placePose)

        raw_input("Press [Enter] to continue...")

    print("Finished.")
示例#5
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentBottlesOnCoasters(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    nPlacedObjects = []
    nGraspedObjects = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []

    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceObjects(False)
        rlEnv.PlaceObjects(True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        nPlaced = 0
        nGrasped = 0
        holdingDesc = None
        observations = []
        actions = []
        rewards = []
        isGrasp = []

        for t in xrange(tMax):
            # get a point cloud
            cloud = rlEnv.GetArtificialCloud()
            #rlEnv.PlotCloud(cloud)
            #if showSteps: raw_input("Acquired cloud.")
            isGrasp.append(holdingDesc is None)
            # get the next action
            o, a, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, r = rlEnv.Transition(overtDesc, cloud)
            # save experiences
            timeStepEpsilon.append(epsilon)
            observations.append(o)
            actions.append(a)
            rewards.append(r)
            R += r
            # compute task success -- number of objects placed
            if isGrasp[-1]:
                if holdingDesc is not None:
                    nGrasped += 1
                if r < 0:
                    nPlaced -= 1
            else:
                if r == 1:
                    nPlaced += 1

        rlAgent.AddExperienceMonteCarlo(observations, actions, rewards,
                                        isGrasp)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionMonteCarlo())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        nPlacedObjects.append(nPlaced)
        nGraspedObjects.append(nGrasped)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "nPlacedObjects": nPlacedObjects,
                "nGraspedObjects": nGraspedObjects,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()
示例#6
0
def main(saveFileSuffix):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 0  # 0=sample, 1=sample+label
    nGraspSamples = 100
    graspScoreThresh = 350

    # learning
    nValueIterations = 70
    nDataIterations = 50
    nGraspIterations = 20
    pickEpsilon = 1.0
    placeEpsilon = 1.0
    minPickEpsilon = 0.10
    minPlaceEpsilon = 0.10
    pickEpsilonDelta = 0.05
    placeEpsilonDelta = 0.05
    maxExperiences = 25000
    trainingBatchSize = 25000
    unbiasOnIteration = nValueIterations - 5

    # visualization/saving
    saveFileName = "results" + saveFileSuffix + ".mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    averageReward = []
    placeActionCounts = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for valueIterationIdx in xrange(nValueIterations):

        print("Iteration {}. Epsilon pick: {}, place: {}".format(\
          valueIterationIdx, pickEpsilon, placeEpsilon))

        # 1. Collect data for this training iteration.

        iterationStartTime = time.time()
        R = []
        placeCounts = zeros(nPlaceOptions)

        # check if it's time to unbias data
        if valueIterationIdx >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            pickEpsilon = 0  # estimating value function of actual policy
            placeEpsilon = 0  # estimating value function of actual policy

        for dataIterationIdx in xrange(nDataIterations):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)
            rlAgent.PlotCloud(cloud)

            # detect grasps in the sensory data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            rlAgent.PlotGrasps(grasps)

            if showSteps:
                raw_input("Acquired grasps.")

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                rlAgent.UnplotCloud()
                continue

            for graspIterationIdx in xrange(nGraspIterations):

                print("Episode {}.{}.{}.".format(valueIterationIdx,
                                                 dataIterationIdx,
                                                 graspIterationIdx))

                # perform pick action
                grasp = rlAgent.GetGrasp(grasps, pickEpsilon)
                s = rlEnv.GetState(rlAgent, grasp, None)
                rlAgent.PlotGrasps([grasp])

                if showSteps:
                    print("Selected grasp.")

                # perform place action
                P = rlAgent.GetPlacePose(grasp, placeEpsilon)
                rlAgent.MoveHandToPose(P)
                ss = rlEnv.GetState(rlAgent, grasp, P)
                rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
                r = rlEnv.RewardBinary(objHandle, targetObjectAxis,
                                       maxAngleFromObjectAxis,
                                       maxObjectTableGap)
                print("The robot receives {} reward.".format(r))

                if showSteps:
                    raw_input("Press [Enter] to continue...")

                # add experience to database
                experienceDatabase.append((s, ss, 0))  # grasp -> placement
                experienceDatabase.append((ss, None, r))  # placement -> end

                # record save data
                R.append(r)
                placeCounts += ss[1][len(s[1]) - nPlaceOptions:]

                # cleanup this grasp iteration
                rlAgent.UnplotGrasps()
                rlEnv.MoveObjectToPose(objHandle, objRandPose)

            # cleanup this data iteration
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(\
          experienceDatabase, trainingBatchSize)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        pickEpsilon -= pickEpsilonDelta
        placeEpsilon -= placeEpsilonDelta
        pickEpsilon = max(minPickEpsilon, pickEpsilon)
        placeEpsilon = max(minPlaceEpsilon, placeEpsilon)

        # 4. Save results
        averageReward.append(mean(R))
        placeActionCounts.append(placeCounts)
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "nValueIterations": nValueIterations,
            "nDataIterations": nDataIterations,
            "nGraspIterations": nGraspIterations,
            "pickEpsilon": pickEpsilon,
            "placeEpsilon": placeEpsilon,
            "minPickEpsilon": minPickEpsilon,
            "minPlaceEpsilon": minPlaceEpsilon,
            "pickEpsilonDelta": pickEpsilonDelta,
            "placeEpsilonDelta": placeEpsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "averageReward": averageReward,
            "placeActionCounts": placeActionCounts,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)
def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    objectClass = "mug_train"
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # learning
    nTrainingIterations = 100
    nEpisodes = 100
    nReuses = 10
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 1.0
    epsilonDelta = 0.05
    minEpsilon = 0.05
    maxExperiences = 50000
    trainingBatchSize = 50000
    unbiasOnIteration = nTrainingIterations - 5

    # visualization/saving
    saveFileName = "results.mat"
    recordLoss = True
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    nPlaceOptions = len(rlAgent.placePoses)
    experienceDatabase = []

    # RUN TEST =======================================================================================

    avgReturn = []
    avgGraspsDetected = []
    avgTopGraspsDetected = []
    placeHistograms = []
    avgGoodTempPlaceCount = []
    avgBadTempPlaceCount = []
    avgGoodFinalPlaceCount = []
    avgBadFinalPlaceCount = []
    trainLosses = []
    testLosses = []
    databaseSize = []
    iterationTime = []

    for trainingIteration in xrange(nTrainingIterations):

        # initialization
        iterationStartTime = time.time()
        print("Iteration: {}, Epsilon: {}".format(trainingIteration, epsilon))

        placeHistogram = zeros(nPlaceOptions)
        Return = []
        graspsDetected = []
        topGraspsDetected = []
        goodTempPlaceCount = []
        badTempPlaceCount = []
        goodFinalPlaceCount = []
        badFinalPlaceCount = []

        # check if it's time to unbias data
        if trainingIteration >= unbiasOnIteration:
            maxExperiences = trainingBatchSize  # selects all recent experiences, unbiased
            epsilon = 0  # estimating value function of actual policy

        # for each episode/object placement
        for episode in xrange(nEpisodes):

            # place random object in random orientation on table
            fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
                objectClass, randomScale)
            objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
                fullObjName, objScale)

            # move the hand to view position(s) and capture a point cloud
            cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                viewCenter, viewKeepout, viewWorkspace)

            # detect grasps in the sensor data
            grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                          nGraspSamples, graspScoreThresh,
                                          graspDetectMode)
            graspsStart = grasps

            graspsDetected.append(len(grasps))
            topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                  maxAngleFromObjectAxis)

            if len(grasps) == 0:
                print("No grasps found. Skipping iteration.")
                rlEnv.RemoveObject(objHandle)
                continue

            rlAgent.PlotCloud(cloud)
            rlAgent.PlotGrasps(grasps)

            for reuse in xrange(nReuses):

                print("Episode {}.{}.{}.".format(trainingIteration, episode,
                                                 reuse))

                if showSteps:
                    raw_input(
                        "Beginning of episode. Press [Enter] to continue...")

                # initialize recording variables
                episodePlaceHistogram = zeros(nPlaceOptions)
                episodeReturn = 0
                episodeGoodTempPlaceCount = 0
                episodeBadTempPlaceCount = 0
                episodeGoodFinalPlaceCount = 0
                episodeBadFinalPlaceCount = 0
                graspDetectionFailure = False
                episodeExperiences = []

                # initial state and first action
                s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
                a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
                rlAgent.PlotGrasps([grasp])

                # for each time step in the episode
                for t in xrange(maxTimesteps):

                    ss, selectedGrasp, rr = rlEnv.Transition(
                        rlAgent, objHandle, s, selectedGrasp, a, grasp, place,
                        targetObjectAxis, maxAngleFromObjectAxis,
                        maxObjectTableGap)
                    ssIsPlacedTempGood = ss[1][1]
                    ssIsPlacedTempBad = ss[1][2]
                    ssIsPlacedFinalGood = ss[1][3]
                    ssIsPlacedFinalBad = ss[1][4]

                    if showSteps:
                        raw_input(
                            "Transition {}. Press [Enter] to continue...".
                            format(t))

                    # re-detect only if a non-terminal placement just happened
                    if ssIsPlacedTempGood and place is not None:
                        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                            viewCenter, viewKeepout, viewWorkspace)
                        grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                                      viewPointIndices,
                                                      nGraspSamples,
                                                      graspScoreThresh,
                                                      graspDetectMode)
                        graspsDetected.append(len(grasps))
                        topGraspsCount = CountObjectTopGrasps(
                            grasps, rlEnv.GetObjectPose(objHandle),
                            maxAngleFromObjectAxis)
                        topGraspsDetected.append(topGraspsCount)
                        if len(grasps) == 0:
                            print("Grasp detection failure.")
                            graspDetectionFailure = True
                            break
                        rlAgent.PlotCloud(cloud)
                        rlAgent.PlotGrasps(grasps)

                    # get next action
                    aa, ggrasp, pplace = rlAgent.ChooseAction(
                        ss, grasps, epsilon)
                    if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

                    if showSteps:
                        raw_input(
                            "Action {}. Press [Enter] to continue...".format(
                                t))

                    # add to database and record data
                    episodeExperiences.append((s, a, rr, ss, aa))
                    episodeReturn += (gamma**t) * rr
                    if place is not None:
                        episodeGoodTempPlaceCount += ssIsPlacedTempGood
                        episodeBadTempPlaceCount += ssIsPlacedTempBad
                        episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                        episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                        placeHistogram += a[1]

                    # prepare for next time step
                    if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                        break
                    s = ss
                    a = aa
                    grasp = ggrasp
                    place = pplace

                # cleanup this reuse
                if not graspDetectionFailure:
                    experienceDatabase += episodeExperiences
                    placeHistogram += episodePlaceHistogram
                    Return.append(episodeReturn)
                    goodTempPlaceCount.append(episodeGoodTempPlaceCount)
                    badTempPlaceCount.append(episodeBadTempPlaceCount)
                    goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
                    badFinalPlaceCount.append(episodeBadFinalPlaceCount)
                rlEnv.MoveObjectToPose(objHandle, objRandPose)
                grasps = graspsStart

            # cleanup this episode
            rlEnv.RemoveObject(objHandle)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()

        # 2. Compute value labels for data.
        experienceDatabase = rlAgent.PruneDatabase(experienceDatabase,
                                                   maxExperiences)
        Dl = rlAgent.DownsampleAndLabelData(experienceDatabase,
                                            trainingBatchSize, gamma)
        databaseSize.append(len(experienceDatabase))

        # 3. Train network from replay database.
        trainLoss, testLoss = rlAgent.Train(Dl, recordLoss=recordLoss)
        trainLosses.append(trainLoss)
        testLosses.append(testLoss)

        epsilon -= epsilonDelta
        epsilon = max(minEpsilon, epsilon)

        # 4. Save results
        avgReturn.append(mean(Return))
        avgGraspsDetected.append(mean(graspsDetected))
        avgTopGraspsDetected.append(mean(topGraspsDetected))
        placeHistograms.append(placeHistogram)
        avgGoodTempPlaceCount.append(mean(goodTempPlaceCount))
        avgBadTempPlaceCount.append(mean(badTempPlaceCount))
        avgGoodFinalPlaceCount.append(mean(goodFinalPlaceCount))
        avgBadFinalPlaceCount.append(mean(badFinalPlaceCount))
        iterationTime.append(time.time() - iterationStartTime)
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nTrainingIterations": nTrainingIterations,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "minEpsilon": minEpsilon,
            "epsilonDelta": epsilonDelta,
            "maxExperiences": maxExperiences,
            "trainingBatchSize": trainingBatchSize,
            "avgReturn": avgReturn,
            "avgGraspsDetected": avgGraspsDetected,
            "avgTopGraspsDetected": avgTopGraspsDetected,
            "placeHistograms": placeHistograms,
            "avgGoodTempPlaceCount": avgGoodTempPlaceCount,
            "avgBadTempPlaceCount": avgBadTempPlaceCount,
            "avgGoodFinalPlaceCount": avgGoodFinalPlaceCount,
            "avgBadFinalPlaceCount": avgBadFinalPlaceCount,
            "trainLoss": trainLosses,
            "testLoss": testLosses,
            "databaseSize": databaseSize,
            "iterationTime": iterationTime,
            "placePoses": rlAgent.placePoses
        }
        savemat(saveFileName, saveData)
    time.sleep(2)
    print("Before 1,000 iterations have passed,\nyou will notice that the agent is getting better\nas it needs fewer steps to complete a corse.")
    time.sleep(5)
    print("When it has reached 5,000 iterations, you\nget to see exactly how it acts.")
    time.sleep(5)

if __name__ == '__main__':
    size = 10
    epsilon = .5
    bufferSize = 1000000
    syncRate = 1000
    batchSize = 32
    discount = .98
    graphics = False
    environment = Environment(size,size)
    agent = RlAgent(size,size,epsilon,bufferSize,syncRate,batchSize,discount)
    i = 0
    while True:
        if i ==200:
            agent.epsilon = .1
        elif i == 1000:
            agent.epsilon = .05
        elif i == 5000:
            agent.epsilon = .02
            graphics = True
        steps = 0
        environment = Environment(size,size)
        while environment.state is not None:
            steps+=1
            state1 = environment.state.copy()
示例#9
0
def main(objectClass, epsilon):
  '''Entrypoint to the program.
    - Input objectClass: Folder in 3D Net database.
  '''

  # PARAMETERS =====================================================================================

  # objects
  randomScale = True
  targetObjectAxis = array([0,0,1])
  maxAngleFromObjectAxis = 20*(pi/180)
  maxObjectTableGap = 0.02

  # view
  viewCenter = array([0,0,0])
  viewKeepout = 0.50
  viewWorkspace = [(-1,1),(-1,1),(-1,1)]

  # grasps
  graspDetectMode = 1 # 0=sample, 1=sample+label
  nGraspSamples = 200
  graspScoreThresh = 300
  nGraspInliers = 3

  # learning
  weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
  nDataIterations = 300

  # visualization/saving
  showViewer = False
  showEveryStep = False
  saveFileName = "results-single-" + objectClass + "-epsilon" + str(epsilon) + ".mat"

  # INITIALIZATION =================================================================================

  threeDNet = ThreeDNet()
  rlEnv = RlEnvironment(showViewer)
  rlAgent = RlAgent(rlEnv)
  if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
  Return = []

  # RUN TEST =======================================================================================

  # Collect data for this training iteration.

  for dataIterationIdx in xrange(nDataIterations):

    print("Iteration {}.".format(dataIterationIdx))

    # place object in random orientation on table
    fullObjName, objScale = threeDNet.GetRandomObjectFromClass(objectClass, randomScale)
    objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(fullObjName, objScale)

    # move the hand to view position and capture a point cloud
    cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
      viewCenter, viewKeepout, viewWorkspace)
    rlAgent.PlotCloud(cloud)

    # detect grasps in the sensory data
    grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
      nGraspSamples, graspScoreThresh, nGraspInliers, graspDetectMode)
    rlAgent.PlotGrasps(grasps)

    if len(grasps) == 0:
      print("No grasps found. Skipping iteration.")
      rlEnv.RemoveObject(objHandle)
      rlAgent.UnplotCloud()
      continue

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform pick action
    grasp = rlAgent.GetGrasp(grasps, epsilon)
    rlAgent.PlotGrasps([grasp])

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # perform place action
    P = rlAgent.GetPlacePose(grasp, epsilon)
    rlAgent.MoveHandToPose(P)
    rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
    r = rlEnv.RewardHeightExponential(
      objHandle, targetObjectAxis, maxAngleFromObjectAxis, maxObjectTableGap)
    print("The robot receives {} reward.".format(r))
    Return.append(r)

    if showEveryStep:
      raw_input("Press [Enter] to continue...")

    # cleanup this data iteration
    rlEnv.RemoveObject(objHandle)
    rlAgent.UnplotGrasps()
    rlAgent.UnplotCloud()

    saveData = {"randomScale":randomScale, "targetObjectAxis":targetObjectAxis,
      "maxAngleFromObjectAxis":maxAngleFromObjectAxis, "maxObjectTableGap":maxObjectTableGap,
      "graspDetectMode":graspDetectMode, "nGraspSamples":nGraspSamples,
      "graspScoreThresh":graspScoreThresh, "weightsFileName":weightsFileName,
      "nDataIterations":nDataIterations, "epsilon":epsilon, "Return":Return}
    savemat(saveFileName, saveData)
示例#10
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentPegsOnDisks(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    nPlacedObjects = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []

    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceObjects(False)
        rlEnv.PlaceObjects(True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        nPlaced = 0
        holdingDesc = None
        o = None
        a = None
        r = None
        for t in xrange(tMax):
            # get a point cloud
            cloud = rlEnv.GetArtificialCloud()
            isGrasp = holdingDesc is None
            # get the next action
            oo, aa, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, cloud, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, rr = rlEnv.Transition(overtDesc, cloud)
            # save experience
            if t > 0: rlAgent.AddExperienceSarsa(o, a, r, oo, aa)
            o = oo
            a = aa
            r = rr
            # save recorded data
            R += r
            if isGrasp:
                if r < 0:
                    nPlaced -= 1
            else:
                if r > 0:
                    nPlaced += 1
            timeStepEpsilon.append(epsilon)

        # add final experience
        rlAgent.AddExperienceSarsa(o, a, r, [None] * rlAgent.nLevels,
                                   [None] * rlAgent.nLevels)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionSarsa())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        nPlacedObjects.append(nPlaced)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "nPlacedObjects": nPlacedObjects,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()
示例#11
0
def main(objectClass, epsilon):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    nObjects = 7
    randomObjectScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.02

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]
    objViewWorkspace = [(-1, 1), (-1, 1), (0.002, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 500
    graspScoreThresh = 300
    nGraspInliers = 2

    # testing
    weightsFileName = "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/image_iter_5000.caffemodel"
    nDataIterations = 300

    # visualization/saving
    saveFileName = "results-clutter-" + objectClass + "-epsilon" + str(
        epsilon) + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    if epsilon < 1.0: rlAgent.LoadNetworkWeights(weightsFileName)
    Return = []

    # RUN TEST =======================================================================================

    for dataIterationIdx in xrange(nDataIterations):

        print("Iteration {}.".format(dataIterationIdx))

        # place clutter on table
        fullObjNames, objScales = threeDNet.GetRandomObjectSet(
            objectClass, nObjects, randomObjectScale)
        objHandles, objPoses = rlEnv.PlaceObjectSet(fullObjNames, objScales)

        objCloud, objCloudIdxs = rlEnv.AssignPointsToObjects(
            rlAgent, objHandles, viewCenter, viewKeepout, objViewWorkspace)

        if showSteps:
            raw_input("Objects placed.")

        # move the hand to view position and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Point cloud.")

        # detect grasps in the sensory data
        graspsDetected = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh, nGraspInliers,
                                              graspDetectMode)
        grasps = rlAgent.FilterGraspsWithNoPoints(graspsDetected, objCloud)
        if len(graspsDetected) > len(grasps):
            print("Fitlered {} empty grasps.".format(
                len(graspsDetected) - len(grasps)))
        rlAgent.PlotGrasps(grasps)

        if showSteps:
            raw_input("Acquired grasps.")

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObjectSet(objHandles)
            rlAgent.UnplotGrasps()
            rlAgent.UnplotCloud()
            continue

        # perform pick action
        grasp = rlAgent.GetGrasp(grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        if showSteps:
            raw_input("Selected grasp.")

        # perform place action
        P = rlAgent.GetPlacePose(grasp, epsilon)
        rlAgent.MoveHandToPose(P)
        objHandle, objPose = rlEnv.GetObjectWithMaxGraspPoints(
            grasp, objHandles, objCloud, objCloudIdxs)
        rlAgent.MoveObjectToHandAtGrasp(grasp, objHandle)
        r = rlEnv.RewardHeightExponential(objHandle, targetObjectAxis,
                                          maxAngleFromObjectAxis,
                                          maxObjectTableGap)
        print("The robot receives {} reward.".format(r))
        Return.append(r)

        if showSteps:
            raw_input("Press [Enter] to continue...")

        # cleanup this data iteration
        rlEnv.RemoveObjectSet(objHandles)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "nObjects": nObjects,
            "randomObjectScale": randomObjectScale,
            "targetObjectAxis": targetObjectAxis,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "graspDetectMode": graspDetectMode,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "weightsFileName": weightsFileName,
            "nDataIterations": nDataIterations,
            "epsilon": epsilon,
            "Return": Return
        }
        savemat(saveFileName, saveData)
示例#12
0
def main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # system
    gpuId = 0

    # objects
    nObjects = 10
    objectFolder = "/home/mgualti/Data/RaveObjects/RectangularBlocks"

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.70
    viewWorkspace = [(-1.0, 1.0), (-1.0, 1.0), (-1.0, 1.0)]

    # learning
    nEpisodes = 1000
    gpdScoreThresh = -float('Inf')
    gpdNSamples = 500

    # visualization/saving
    saveFileName = "results.mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    rlEnv = RlEnvironmentGrasping(showViewer)
    rlAgent = RlAgent(rlEnv, gpuId)
    gpd = GraspProxyMatlab()

    # RUN TEST =======================================================================================

    antipodal = []
    perfect = []
    episodeTime = []

    for episode in xrange(nEpisodes):

        # Initialization
        episodeStartTime = time.time()

        # place random object in random orientation on table
        objHandles = rlEnv.PlaceObjects(nObjects, objectFolder)
        if showSteps: raw_input("Placed objects.")

        cloud, cloudTree, viewPoints, viewPointIndices = rlAgent.GetDualCloudAndViewPoints(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)
        if showSteps: raw_input("Acquired point cloud.")

        #SaveCloud(cloud, viewPoints, viewPointIndices, "blocks1.mat")

        # call gpd
        descriptors = gpd.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                       gpdNSamples, gpdScoreThresh, gpuId)

        if len(descriptors) > 0:
            # choose descriptor with max score
            bestScore = -float('inf')
            for descriptor in descriptors:
                if descriptor.score > bestScore:
                    desc = descriptor
                    bestScore = desc.score

            rlAgent.PlotDescriptors([desc])

            # check grasp and finish
            ant, antAndCf = rlEnv.TestGrasp(desc, rlAgent, objHandles)
        else:
            print("No grasps found!")
            ant = 0.0
            perfect = 0.0

        # cleanup this scene
        print("Episode {}, antipodal={}, antipodal+collisionFree={}".format(
            episode, ant, antAndCf))
        antipodal.append(ant)
        perfect.append(antAndCf)
        rlEnv.RemoveObjectSet(objHandles)

        # Save results
        episodeTime.append(time.time() - episodeStartTime)
        saveData = {
            "gpuId": gpuId,
            "nObjects": nObjects,
            "objectFolder": objectFolder,
            "viewCenter": viewCenter,
            "viewKeepout": viewKeepout,
            "viewWorkspace": viewWorkspace,
            "nEpisodes": nEpisodes,
            "gpdScoreThresh": gpdScoreThresh,
            "gpdNSamples": gpdNSamples,
            "episodeTime": episodeTime,
            "antipodal": antipodal,
            "perfect": perfect
        }
        savemat(saveFileName, saveData)
示例#13
0
def main(objectClass):
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    # objects
    randomScale = True
    targetObjectAxis = array([0, 0, 1])
    maxAngleFromObjectAxis = 20 * (pi / 180)
    maxObjectTableGap = 0.03

    # view
    viewCenter = array([0, 0, 0])
    viewKeepout = 0.50
    viewWorkspace = [(-1, 1), (-1, 1), (-1, 1)]

    # grasps
    graspDetectMode = 1  # 0=sample, 1=sample+label
    nGraspSamples = 200
    graspScoreThresh = 300

    # testing
    nEpisodes = 300
    maxTimesteps = 10
    gamma = 0.98
    epsilon = 0.0
    weightsFileName = \
      "/home/mgualti/mgualti/PickAndPlace/simulation/caffe/dualImage_iter_5000.caffemodel"

    # visualization/saving
    saveFileName = "results-" + objectClass + ".mat"
    showViewer = False
    showSteps = False

    # INITIALIZATION =================================================================================

    threeDNet = ThreeDNet()
    rlEnv = RlEnvironment(showViewer)
    rlAgent = RlAgent(rlEnv)
    rlAgent.LoadNetworkWeights(weightsFileName)
    nPlaceOptions = len(rlAgent.placePoses)

    placeHistogram = zeros(nPlaceOptions)
    Return = []
    graspsDetected = []
    topGraspsDetected = []
    goodTempPlaceCount = []
    badTempPlaceCount = []
    goodFinalPlaceCount = []
    badFinalPlaceCount = []

    # RUN TEST =======================================================================================

    # for each episode/object placement
    for episode in xrange(nEpisodes):

        # place random object in random orientation on table
        fullObjName, objScale = threeDNet.GetRandomObjectFromClass(
            objectClass, randomScale)
        objHandle, objRandPose = rlEnv.PlaceObjectRandomOrientation(
            fullObjName, objScale)
        rlAgent.MoveSensorToPose(
            rlAgent.GetStandardViewPose(viewCenter, viewKeepout))

        if showSteps:
            raw_input("Beginning of episode. Press [Enter] to continue...")

        # move the hand to view position(s) and capture a point cloud
        cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
            viewCenter, viewKeepout, viewWorkspace)
        rlAgent.PlotCloud(cloud)

        if showSteps:
            raw_input("Acquired point cloud. Press [Enter] to continue...")

        # detect grasps in the sensor data
        grasps = rlAgent.DetectGrasps(cloud, viewPoints, viewPointIndices,
                                      nGraspSamples, graspScoreThresh,
                                      graspDetectMode)

        graspsDetected.append(len(grasps))
        topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                              maxAngleFromObjectAxis)

        if len(grasps) == 0:
            print("No grasps found. Skipping iteration.")
            rlEnv.RemoveObject(objHandle)
            continue

        rlAgent.PlotGrasps(grasps)

        print("Episode {}.".format(episode))

        if showSteps:
            raw_input("Acquired grasps. Press [Enter] to continue...")

        # initialize recording variables
        episodePlaceHistogram = zeros(nPlaceOptions)
        episodeReturn = 0
        episodeGoodTempPlaceCount = 0
        episodeBadTempPlaceCount = 0
        episodeGoodFinalPlaceCount = 0
        episodeBadFinalPlaceCount = 0
        graspDetectionFailure = False

        # initial state and first action
        s, selectedGrasp = rlEnv.GetInitialState(rlAgent)
        a, grasp, place = rlAgent.ChooseAction(s, grasps, epsilon)
        rlAgent.PlotGrasps([grasp])

        # for each time step in the episode
        for t in xrange(maxTimesteps):

            ss, selectedGrasp, rr = rlEnv.Transition(rlAgent, objHandle, s,
                                                     selectedGrasp, a, grasp,
                                                     place, targetObjectAxis,
                                                     maxAngleFromObjectAxis,
                                                     maxObjectTableGap)
            ssIsPlacedTempGood = ss[1][1]
            ssIsPlacedTempBad = ss[1][2]
            ssIsPlacedFinalGood = ss[1][3]
            ssIsPlacedFinalBad = ss[1][4]

            if showSteps:
                raw_input(
                    "Transition {}. Press [Enter] to continue...".format(t))

            # re-detect only if a non-terminal placement just happened
            if ssIsPlacedTempGood and place is not None:
                cloud, viewPoints, viewPointIndices = rlAgent.GetDualCloud(
                    viewCenter, viewKeepout, viewWorkspace)
                rlAgent.UnplotGrasps()
                rlAgent.PlotCloud(cloud)
                if showSteps:
                    raw_input("Acquired cloud. Press [Enter] to continue...")
                grasps = rlAgent.DetectGrasps(cloud, viewPoints,
                                              viewPointIndices, nGraspSamples,
                                              graspScoreThresh,
                                              graspDetectMode)
                graspsDetected.append(len(grasps))
                topGraspsCount = CountObjectTopGrasps(grasps, objRandPose,
                                                      maxAngleFromObjectAxis)
                topGraspsDetected.append(topGraspsCount)
                if len(grasps) == 0:
                    print("Grasp detection failure.")
                    graspDetectionFailure = True
                    break
                rlAgent.PlotGrasps(grasps)
                if showSteps:
                    raw_input("Acquired grasps. Press [Enter] to continue...")

            # get next action
            aa, ggrasp, pplace = rlAgent.ChooseAction(ss, grasps, epsilon)
            if ggrasp is not None: rlAgent.PlotGrasps([ggrasp])

            if showSteps:
                raw_input("Action {}. Press [Enter] to continue...".format(t))

            # record data from transition
            episodeReturn += (gamma**t) * rr
            if place is not None:
                episodeGoodTempPlaceCount += ssIsPlacedTempGood
                episodeBadTempPlaceCount += ssIsPlacedTempBad
                episodeGoodFinalPlaceCount += ssIsPlacedFinalGood
                episodeBadFinalPlaceCount += ssIsPlacedFinalBad
                placeHistogram += a[1]

            # prepare for next time step
            if ssIsPlacedTempBad or ssIsPlacedFinalGood or ssIsPlacedFinalBad:
                break
            s = ss
            a = aa
            grasp = ggrasp
            place = pplace

        # cleanup this reuse
        if not graspDetectionFailure:
            placeHistogram += episodePlaceHistogram
            Return.append(episodeReturn)
            goodTempPlaceCount.append(episodeGoodTempPlaceCount)
            badTempPlaceCount.append(episodeBadTempPlaceCount)
            goodFinalPlaceCount.append(episodeGoodFinalPlaceCount)
            badFinalPlaceCount.append(episodeBadFinalPlaceCount)

        # cleanup this episode
        rlEnv.RemoveObject(objHandle)
        rlAgent.UnplotGrasps()
        rlAgent.UnplotCloud()

        # Save results
        saveData = {
            "objectClass": objectClass,
            "randomScale": randomScale,
            "maxAngleFromObjectAxis": maxAngleFromObjectAxis,
            "maxObjectTableGap": maxObjectTableGap,
            "nGraspSamples": nGraspSamples,
            "graspScoreThresh": graspScoreThresh,
            "graspDetectMode": graspDetectMode,
            "nEpisodes": nEpisodes,
            "maxTimesteps": maxTimesteps,
            "gamma": gamma,
            "epsilon": epsilon,
            "Return": Return,
            "graspsDetected": graspsDetected,
            "topGraspsDetected": topGraspsDetected,
            "placeHistogram": placeHistogram,
            "goodTempPlaceCount": goodTempPlaceCount,
            "badTempPlaceCount": badTempPlaceCount,
            "goodFinalPlaceCount": goodFinalPlaceCount,
            "badFinalPlaceCount": badFinalPlaceCount
        }
        savemat(saveFileName, saveData)
示例#14
0
def Main():
    '''Entrypoint to the program.'''

    # PARAMETERS =====================================================================================

    params = loadmat("parameters.mat", squeeze_me=True)
    randomSeed = params["randomSeed"]
    tMax = params["tMax"]
    nEpisodes = params["nEpisodes"]
    trainEvery = params["trainEvery"]
    unbiasOnEpisode = params["unbiasOnEpisode"]
    nObjects = params["nObjects"]
    nSurfaceObjects = params["nSurfaceObjects"]
    objHeight = params["objHeight"]
    objRadius = params["objRadius"]
    surfObjHeight = params["surfObjHeight"]
    surfObjRadius = params["surfObjRadius"]
    saveFileName = params["saveFileName"]
    loadNetwork = params["loadNetwork"]
    loadDatabase = params["loadDatabase"]
    showSteps = params["showSteps"]

    # INITIALIZATION =================================================================================

    # set random seeds
    seed(randomSeed)
    tensorflow.random.set_seed(randomSeed)

    # initialize agent and environment
    rlEnv = RlEnvironmentPegsOnDisks(params)
    rlAgent = RlAgent(params)

    # if testing, load previous results
    if loadNetwork:
        rlAgent.LoadQFunction()

    if loadDatabase:
        rlAgent.LoadExperienceDatabase()

    # RUN TEST =======================================================================================

    episodeReturn = []
    episodeTime = []
    timeStepEpsilon = []
    databaseSize = []
    losses = []
    for episode in xrange(nEpisodes):

        startTime = time()

        # place random object in random orientation on table
        rlEnv.MoveHandToHoldingPose()
        rlEnv.PlaceCylinders(nObjects, objHeight, objRadius, False)
        rlEnv.PlaceCylinders(nSurfaceObjects, surfObjHeight, surfObjRadius,
                             True)
        if showSteps: raw_input("Placed objects.")

        R = 0
        holdingDesc = None
        observations = []
        actions = []
        rewards = []
        for t in xrange(tMax):
            #  get the next action
            o, a, overtDesc, epsilon = rlAgent.SenseAndAct(
                holdingDesc, t, rlEnv, episode >= unbiasOnEpisode)
            # perform transition
            holdingDesc, r = rlEnv.Transition(overtDesc)
            # save experiences
            timeStepEpsilon.append(epsilon)
            observations.append(o)
            actions.append(a)
            rewards.append(r)
            R += r

        rlAgent.AddExperienceMonteCarlo(observations, actions, rewards)

        # cleanup episode
        rlEnv.ResetEpisode()
        print("Episode {} had return {}".format(episode, R))

        # training
        if episode % trainEvery == trainEvery - 1:
            losses.append(rlAgent.UpdateQFunctionMonteCarlo())
            rlAgent.SaveQFunction()

        # save results
        episodeReturn.append(R)
        episodeTime.append(time() - startTime)
        databaseSize.append(rlAgent.GetNumberOfExperiences())

        if episode % trainEvery == trainEvery - 1 or episode == nEpisodes - 1:
            saveData = {
                "episodeReturn": episodeReturn,
                "episodeTime": episodeTime,
                "timeStepEpsilon": timeStepEpsilon,
                "databaseSize": databaseSize,
                "losses": losses
            }
            saveData.update(params)
            savemat(saveFileName, saveData)

        # backup agent data
        if episode == nEpisodes - 1:
            rlAgent.SaveExperienceDatabase()