Python target示例，target.target Python示例

示例#1

0

显示文件

def performance(inputtxt):  #define Performance for POW
    from target import target  #importing Target Generation module
    from sol import sol  #importing Solution module

    import time  #Importing time module

    s = [0] * 6  #Initializing an array of 6 integers for Solutions
    s_time = [0] * 6  #Initializing an array of 6 integers for Solution Time

    for i in range(0, 6):  #Iterating the array
        sol_start = time.time()  #Noting the Start time
        target(21 + i, '../data/target.txt')  #Calling target function
        sol(inputtxt, '../data/target.txt',
            '../data/solution.txt')  #Calling sol function
        sol_end = time.time()  #Noting the end time

        f3 = open('../data/solution.txt',
                  'r')  #Opening the solution.txt file in read mode
        s[i] = int(f3.read().strip('\n'))  #storing the solution in the array
        f3.close()

        s_time[
            i] = sol_start - sol_end  #Calculate the time elapsed for solution function

    for i in range(0, 6):
        print('\nSolution' + str(i) + ' is ' + str(s[i]))
        print('Time taken for S' + str(i) + ' is ' + str(s_time[i]) +
              ' seconds')

示例#2

0

显示文件

文件： imagescheduler.py 项目： albertw/AutoSkyX

 def _savetargetHandler(self, *args):
     ''' need to validate the target name - off to skyx and be sure it
     exists, check the values are sane in the other fields then add to
     the list.
     '''
     if self.tname.get() and self.texposure.get() and self.tnumexp.get():
         # If it already exists delete it
         # Really should work out how to edit it in place to not screw
         # up the order
         index = 'end'
         for item in self.ttree.get_children():
             if self.ttree.item(item)['values'][0] == self.tname.get():
                 index = self.ttree.index(item)
                 self.ttree.delete(item)
         try:
             t = [x for x in self.neoobj.neocplist if x.tmpdesig == self.tname.get()][0]
             t.exposure = self.texposure.get()
             t.nexposures = self.tnumexp.get()
             self.ttree.insert('', index, values=t.imglist())
         except IndexError:
             # It wasn't on the list
             mp = target.target(self.tname.get(), ttype="fixed", nexposures=self.tnumexp.get(), exposure=self.texposure.get())
             self.neoobj.neocplist.append(mp)
             self.ttree.insert('', index, values=mp.imglist())
             
     else:
         tkMessageBox.showinfo(message="Invalid Data Supplied")
     self._clear()

示例#3

0

显示文件

 def block_check(block, DB):
     def log_(txt): pass #return tools.log(txt)
     def tx_check(txs):
         start = copy.deepcopy(txs)
         out = []
         start_copy = []
         while start != start_copy:
             if start == []:
                 return False  # Block passes this test
             start_copy = copy.deepcopy(start)
             if transactions.tx_check[start[-1]['type']](start[-1], out, [''], DB):
                 out.append(start.pop())
             else:
                 return True  # Block is invalid
         return True  # Block is invalid
     if not isinstance(block, dict): return False
     if 'error' in block: return False
     if not tools.E_check(block, 'length', [int]):
         log_('no length')
         return False
     length = DB['length']
     if int(block['length']) != int(length) + 1:
         log_('wrong longth')
         return False
     if block['diffLength'] != hexSum(DB['diffLength'],
                                      hexInvert(block['target'])):
         log_('diflength error')
         return False
     if length >= 0:
         if tools.det_hash(tools.db_get(length, DB)) != block['prevHash']:
             log_('det hash error')
             return False
     a = copy.deepcopy(block)
     a.pop('nonce')
     if u'target' not in block.keys():
         log_('target error')
         return False
     half_way = {u'nonce': block['nonce'], u'halfHash': tools.det_hash(a)}
     if tools.det_hash(half_way) > block['target']:
         log_('det hash error 2')
         return False
     if block['target'] != target.target(DB, block['length']):
         log_('wrong target')
         return False
     earliest = median(recent_blockthings('times', DB, custom.mmm))
     if 'time' not in block: 
         log_('no time')
         return False
     if block['time'] > time.time()+60*6: 
         log_('too late')
         return False
     if block['time'] < earliest: 
         log_('too early')
         return False
     if tx_check(block['txs']): 
         log_('tx check')
         return False
     return True

示例#4

0

显示文件

文件： _function_.py 项目： thebitdragon/OTSU

def ufit_function(x):
    """

    :param x: 传入otsu的阈值
    :return: 相应阈值的最大类间方差
    """
    y = target(im_change, x)

    return -1 * y

示例#5

0

显示文件

def genesis(pubkey, DB):
    target_ = target.target()
    out = {'version': custom.version,
           'length': 0,
           'time': time.time(),
           'target': target_,
           'diffLength': blockchain.hexInvert(target_),
           'txs': [make_mint(pubkey, DB)]}
    out = tools.unpackage(tools.package(out))
    return out

示例#6

0

显示文件

def new_game(event=''):
    """Сама игра"""
    target1 = target.target(canv)
    target2 = target.target(canv)
    bullet = []
    balls = []
    canv.bind('<Button-1>', gun1.fire2_start)
    canv.bind('<ButtonRelease-1>', lambda event: gun1.fire2_end(event, balls, bullet))
    canv.bind('<Motion>', lambda event: gun1.targetting(canv, event))
    target1.live = 1
    target2.live = 1
    while target1.live or balls or target2.live:
        for ball in balls:
            ball.move(canv)
            ball.live -= 1
            if ball.live <= 0:
                canv.delete(ball.id)
                balls.remove(ball)
            if ball.hittest(canv, text_scope, target1) and target1.live:
                target1.live = 0
                canv.delete(target1.id)
            if ball.hittest(canv, text_scope, target2) and target2.live:
                target2.live = 0
                canv.delete(target2.id)
            if not target2.live and not target1.live:
                canv.bind('<Button-1>', '')
                canv.bind('<ButtonRelease-1>', '')
                canv.itemconfig(result_screen, text='Вы уничтожили цели за ' + str(len(bullet)) + ' выстрелов')
        target1.move(canv)
        target2.move(canv)
        canv.update()
        sleeptime = 0.03
        time.sleep(sleeptime)
        gun1.targetting(canv)
        gun1.power_up(canv)
    canv.itemconfig(result_screen, text='')
    canv.delete(gun.gun)
    root.after(750, new_game)

示例#7

0

显示文件

def make_block(prev_block, txs, pubkey, DB):
    leng = int(prev_block['length']) + 1
    target_ = target.target(leng)
    diffLength = blockchain.hexSum(prev_block['diffLength'],
                                   blockchain.hexInvert(target_))
    out = {'version': custom.version,
           'txs': txs + [make_mint(pubkey, DB)],
           'length': leng,
           'time': time.time(),
           'diffLength': diffLength,
           'target': target_,
           'prevHash': tools.det_hash(prev_block)}
    out = tools.unpackage(tools.package(out))
    return out

示例#8

0

显示文件

文件： MPCweb.py 项目： albertw/AutoSkyX

 def get_neocp(self):
     ''' Get the NEOCP data
     '''
     data = urllib2.urlopen(self.neocp)
     regex = re.compile("^(.{7}) (.{3}) (.{12}) (.{8}) (.{8}) (.{4})" +
                        " (.{22}) (.{7}) (.{3})  (.{5}) (.{4})")
     my_neos = []
     for line in data:
         res = regex.match(line)
         my_neo = target.target(res.group(1).strip())
         my_neo.addneoprops(res.group(2), res.group(3),
                            res.group(4), res.group(5), res.group(6),
                            res.group(7), res.group(8), res.group(9),
                            res.group(10), res.group(11))
         my_neos.append(my_neo)
     return my_neos

示例#9

0

显示文件

def Tetris (targetGrid, piecesLimit):

    # Grid
    gridSize = 4
    
    z = []
    t = []

    # Get a target for a gridSizexgridSize grid
    challangeTarget = target(targetGrid, gridSize, testMode)
    
    # Power up the pieced bin
    pieces = pieceBin(piecesLimit, gridSize, testMode)

    #Bash the target
    challangeTarget.bashBits()
    
    # Find Solutions
    findSolutions(challangeTarget, pieces, gridSize, 0)

    # Slice target and rerun with a shift of 1
    challangeTarget.sliceTarget(1)
    challangeTarget.bashBits()
    findSolutions(challangeTarget, pieces, gridSize, 1)

    # Shift
    challangeTarget.sliceTarget(1)
    challangeTarget.bashBits()
    findSolutions(challangeTarget, pieces, gridSize, 2)

    # Shift
    challangeTarget.sliceTarget(1)
    challangeTarget.bashBits()
    findSolutions(challangeTarget, pieces, gridSize, 3)        
    
    tots = sum(pieces.limitArray.values())
    z.append(challangeTarget.targetSolution)
    t.append(tots)
    
    # Add further shifts...
    
    s = np.argmin(t)
    challangeTarget.targetSolution = z[s]
    M = challangeTarget.targetSolution
    return M

示例#10

0

显示文件

文件： MPCweb.py 项目： albertw/AutoSkyX

 def get_crits(self):
     ''' Get the Critial List data.
     '''
     data = urllib2.urlopen(self.crits)
     regex = re.compile("^(.{21})\|(.{14})\|(.{10})\|(.{8})\|(.{8})\|(.{9})\|(.{9})\|(.{5})\|(.{10})\|(.{5})\|(.{5})")
     crits = []
     for line in data:
         res = regex.match(line)
         logger.debug(line)
         logger.debug(res.group(2))
         crit = target.target(res.group(1).strip(), ttype="mp")
         logger.debug(res.group(2) + " " + res.group(3) + " " +
                      res.group(4) + " " + res.group(5) + " " +
                      res.group(6) + " " + res.group(7) + " " +
                      res.group(9) + " " + res.group(10) + " " +
                      res.group(11))
         crit.addcritprops(res.group(2), res.group(3), res.group(4),
                           res.group(5), res.group(6), res.group(7),
                           res.group(9), res.group(10), res.group(11))
         crits.append(crit)
     return crits

示例#11

0

显示文件

文件： multiple_tracker_discrete_reward_centralized_linear.py 项目： gorjida/MultiSensorMutiTargetRL

def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #args = [0,20,50,0,"TEST1",.001,10]

    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    #process_index = args[4]
    np.random.seed(process_index + 100)
    vel_var = args[5]
    num_targets = args[6]




    #method = 0
    #RBF_components = 20
    #MLP_neurons = 50
    #vel_var = .001
    #num_targets = min(6,max(2,np.random.poisson(3)))
    #num_targets = 2

    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters (input && output-layers)
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, output_size]) #Output-layer (maps flattened states to the actions)
        #params[0]["weight"] = []
        #for f in range(0,filter_size):
         #   params[0]["weight"].append(np.random.normal(0,1,[spatial_weight_size,temporal_weight_size])) #Convolution weith matrix

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = 3
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(10,init_max_target)

        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        num_targets = 3
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        avg_uncertainty = []
        max_uncertainty = []

        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_grad_with_state_w1 = []
        episode_grad_with_state_w2 = []

        episode_MLP_state = []
        episode_actions = []


        while episode_condition:
            temp_m = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []

            #create input-feature matrix
            input_state = np.zeros([num_states,num_targets]) #create a fixed-size matrix for input states


            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)

                if method==0 or method==2:input_state[:,i] = current_state


                #target_actions.append(s.generate_action(params,input_state,.1))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)

            #Featurize input-state using pooling
            input_state = list(np.max(input_state,axis=1))+list(np.min(input_state,axis=1))+\
                          list(np.mean(input_state, axis=1))+list(np.median(input_state,axis=1))

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0,num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
              #  reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            s.update_location_new(params,np.array(input_state).reshape([len(input_state),1]),sigma)

            #Output created by the CNN
            episode_state_out_layer.append(input_state)


            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])


        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        condition = True

        prev_params = dict(params)
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            internal_rate = gen_learning_rate(episode_counter, 5*1E-5, 1E-9, 10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)

                #calculate gradient
                if method==0:
                    predicted_action = params[0]['weight'].dot(out_state)
                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - predicted_action).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant

                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2:# or np.max(np.abs(gradiant_internal[0]))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return

                    params[0]['weight'] += rate * adjustment_term_out_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights = list(params[0]['weight'][0,:]) + list(params[0]['weight'][1,:])
            temp = []
            [temp.append(str(x)) for x in flatted_weights]
            weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(params[0]['weight'][0][0][0])
            #weight_saver2.append(params[0]['weight'][0][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

示例#12

0

显示文件

文件： scrape_old.py 项目： ConnorGersbach/DogCrawl

def scrape(arg):
	master = []			#	Master stores ALL RESULTS
	amazonlist = []		#	Holds amazon results
	stapleslist = []
	macyslist = []
	walmartlist = []
	homedepotlist = []

	#	Amazon does NOT use API, and thus might fail occaisionally. Try look fixes this
	if "amazon" in arg:
		while len(amazonlist) == 0:
			try:
				amazonlist = amazon.amazon(arg[1]) # arg[1] should ALWAYS BE THE ITEM to search for
				if amazonlist[0] == "Empty":
					print "amazon returned no results"
				else:
					master += amazonlist
			except:
				pass

	#	Bestbuy DOES use API. I would suggest using API, as its faster and more reliable
	#	However, you will need to get an API key from the website for this
	if "bestbuy" in arg:
		master += bestbuy.bestbuy(arg[1])

	# ADD MORE FUNCTION CALLS TO OTHER SITES HERE!!!!!!!
	#if "https://wherever.net" in arg:
		#wherever(arg[1])

	# I have had zero issues with staples, but let's not take a chance...
	if "staples" in arg:
		while len(stapleslist) == 0:
			try:
				stapleslist = staples.staples(arg[1])
				print stapleslist[0]
				if stapleslist[0] == "Empty":
					print "Staples returned no results"
				else:
					master += stapleslist
			except:
				pass

	if "macys" in arg:
		while len(macyslist) == 0:
			try:
				macyslist = macys.macys(arg[1])
				if macyslist[0] == "Empty":
					print "Macy's returned no results"
				else:
					master += macyslist
			except:
				pass

	if "walmart" in arg:
		while len(walmartlist) == 0:
			try:
				walmartlist = walmart.walmart(arg[1])
				if walmartlist[0] == "Empty":
					print "Walmart returned no results"
				else:
					master += walmartlist
			except:
				pass

		master += walmartlist

	if "homedepot" in arg:
		while len(homedepotlist) == 0:
			try:
				homedepotlist = homedepot.homedepot(arg[1])
				if homedepotlist[0] == "Empty":
					print "homedepot returned no results"
				else:
					master += homedepotlist
			except:
				pass
		master += homedepotlist

	if "target" in arg:
		master += target.target(arg[1])

	# print json.dumps(master)
	for i in master:
		print i[0]
		print i[1]
		print i[2]

示例#13

0

显示文件

def guessMeRandom(path, choice, threshLevel, visual=0):
    """
		Pick up randomly a strip in files, extract and show information about it. After an input from the user, the origin of the strip is show.
		
		Warning: use ResultPreparation folder, and we erase the content of them at the beginning
		
		Parameters
		----------
		*path:Path where all folders are
		*choice:List of name's folder of strips
		*treshLevel: threshold to apply
		*visual:enable to open windows of results or not
		
		"""

    #Clear
    preparation.clearResultFolder(path + '/ResultPreparation')

    #Random choose
    chosenFile = choice[np.random.randint(len(choice))]
    imageList = os.listdir(path + '/CatchThemAll/' + chosenFile)
    chosenImage = imageList[np.random.randint(len(imageList))]
    preparation.shape(path + '/CatchThemAll/' + chosenFile, chosenImage,
                      path + '/ResultPreparation', 0, threshLevel)

    #Show global result at differents threshold
    i = 60
    l = []
    while i <= 140:
        print 'Target with threshold at ', i, '/255'
        tar = target.target(path + '/FFT/', i, 0)
        l.append(tar[0][1])
        l.append(tar[1][1])
        i += 20

    #Pick up the best frequency
    #Count differents frequencies from different levels
    compte = dict([(k, l.count(k)) for k in set(l)])
    print 'Compte: ', compte
    #Show the most commom frequency
    maxFreq = target.maxDict(compte)
    print 'maxFreq: ', maxFreq

    filelist = os.listdir(path + '/ResultPreparation')
    #List where a file and his results are associated
    areaResult = []
    #List for the base and the top of the bumps
    peakFloorRoof = []
    #List of most clearer frequencies of each strips
    freqListTop = []
    #List of all frequencies found
    globalFreq = []
    for fi in filelist:
        im = cv2.imread(path + '/ResultPreparation/' + fi)

        imag = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
        #pick up the height of bumps
        floor, roof = peak.heightFinder(imag)
        print 'Base and top of bumps: ', floor, roof
        peakFloorRoof.append((floor, roof))

        if visual == 1:
            #Draw to line at the base and top of bumps
            im[floor] = (255, 100, 100)
            im[roof] = (100, 255, 100)
            cv2.namedWindow('ImFR', cv2.WINDOW_NORMAL)
            cv2.imshow('ImFR', im)
            k = cv2.waitKey(0) & 0xFF
            if k == 27:
                cv2.destroyAllWindows()

        #Extract informations from strips
        result = target.acquireTarget(imag, threshLevel, visual)
        #Add the informations to the differents list
        freqList.append(result[1])
        areaResult.append((result, fi))
        globalFreq = globalFreq + result[3]

    cpt = dict([(k, freqList.count(k)) for k in set(freqList)])
    cptGlobalFreq = dict([
        (k, (globalFreq.count(k),
             100 * globalFreq.count(k) / float(len(globalFreq))))
        for k in set(globalFreq)
    ])

    #Display information
    print '---------------------------- Heigth of shredder\'s theeth ----------------------------'
    print 'Average value of Floor/Roof: ', np.average(np.array(peakFloorRoof),
                                                      axis=0)
    print '------------------------------ Shredder\'s frequencies -------------------------------'
    print 'Total: ', cpt
    print 'Global freq of areas', cptGlobalFreq
    print '------------------------------- Strip\'s informations --------------------------------'
    print 'Width strip: ', getWidthStripSpecified(path + '/CatchThemAll/' +
                                                  chosenFile + '/' +
                                                  chosenImage)
    print '--------------------------------------------------------------------------------------'

    #Wait an correct input before giving the answer
    waiting = ""
    while waiting != "OK":
        waiting = raw_input(
            "Write OK when you are ready to see the solution: ")
    print 'Answer: ', chosenImage

示例#14

0

显示文件

def run(path, fileName, step, threshLevel=170, visual=0):
    """"
	Use it for extracting informations about strips from a same shredder.
	During the process it's possible to get the following informations:
	-The width of the strip
	-Information about teeth of the shredder
	-Results of fast Fourier transformation (fft)
	
	Parameters
	----------
	*path: path where are all needed folders
	*fileName: path or name of the folder where to pick full images
	*step: 
		0-Catch and preparation phase. Extract and creta all images of strips
		1-Align phase. Don't create bases images
		2-Information phase. Don't create any file during this phase.
	*threshLevel: Threshold to apply to images 
	*visual: Enable or not to open windows of results
	
	"""

    #Step 0
    #Extracting strip from the source image
    #and prepare them to be analyse
    if step < 1:
        print 'Catching and preparation phase'
        #Clearing result's folder
        preparation.clearResultFolder(path + '/ResultPreparation')

        pathFile = path + '/' + fileName
        filelist = os.listdir(pathFile)
        #For each images extract strip and preapare them
        for i in filelist:
            preparation.clearResultFolder(path + '/ResultCatcher')
            catcher.createSet(path, fileName, i)
            preparation.preparation(path + '/ResultCatcher',
                                    path + '/ResultPreparation', visual, 0,
                                    threshLevel)

    #Step 1
    #Create an image of strips align with the best superposition possible
    if step < 2:
        print 'Align phase'
        align.preparation(path)
    #Step 2
    #Find a small area with clear peaks
    if step < 3:
        print 'Information phase'
        i = 60
        l = []
        #Try different level of threshold for the align images
        while i <= 140:
            print 'Target with threshold at ', i, '/255'
            tar = target.target(path + '/FFT/', i, visual)
            l.append(tar[0][1])
            l.append(tar[1][1])
            i += 20

        #Pick up the best frequency
        #Count differents frequencies from different levels
        compte = dict([(k, l.count(k)) for k in set(l)])
        print 'Compte: ', compte
        #Show the most commom frequency
        maxFreq = target.maxDict(compte)
        print 'maxFreq: ', maxFreq

        filelist = os.listdir(path + '/ResultPreparation')
        #List where a file and his results are associated
        areaResult = []
        #List for the base and the top of the bumps
        peakFloorRoof = []
        #List of most clearer frequencies of each strips
        freqListTop = []
        #List of all frequencies found
        globalFreq = []
        for fi in filelist:

            im = cv2.imread(path + '/ResultPreparation/' + fi)

            imag = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
            #pick up the height of bumps
            floor, roof = peak.heightFinder(imag)
            print 'Base and top of bumps: ', floor, roof
            peakFloorRoof.append((floor, roof))

            if visual == 1:
                #Draw to line at the base and top of bumps
                im[floor] = (255, 100, 100)
                im[roof] = (100, 255, 100)
                cv2.namedWindow('ImFR', cv2.WINDOW_NORMAL)
                cv2.imshow('ImFR', im)
                k = cv2.waitKey(0) & 0xFF
                if k == 27:
                    cv2.destroyAllWindows()
            #Extract informations from strips
            result = target.acquireTarget(imag, threshLevel, visual)
            #Add the informations to the differents list
            freqListTop.append(result[1])
            areaResult.append((result, fi))
            globalFreq = globalFreq + result[3]
        #Sort the list
        third(areaResult)

        #Get the top 3
        topArea1 = areaResult[-1]
        topArea2 = areaResult[-2]
        topArea3 = areaResult[-3]

        #Count differents frequencies
        cpt = dict([(k, freqListTop.count(k)) for k in set(freqListTop)])

        print 'Top 3 area: ', topArea1, topArea2, topArea3
        print 'Average value of Floor/Roof: ', np.average(
            np.array(peakFloorRoof), axis=0)
        print 'Total: ', cpt
        peakIdentifier(path, topArea1)
        peakIdentifier(path, topArea2)
        peakIdentifier(path, topArea3)

        cptGlobalFreq = dict([
            (k, (globalFreq.count(k),
                 100 * globalFreq.count(k) / float(len(globalFreq))))
            for k in set(globalFreq)
        ])
        print 'Global Freq: ', cptGlobalFreq

示例#15

0

显示文件

文件： scrape.py 项目： ConnorGersbach/DogCrawl

def scrape_target(arg):
    global master
    master += target.target(arg)

示例#16

0

显示文件

def difficulty(DB): return(str(target.target(DB)))
def my_balance(DB, address='default'):

示例#17

0

显示文件

文件： traversal.py 项目： thebitdragon/OTSU

#将图像转为灰度图像
im_change = im.convert('L')

#读取图片尺寸大小
m = im_change.size[0]  #图片长
n = im_change.size[1]  #图片宽
#print ("长=",m,"\t宽=",n)
#读取像素值
im_array = im_change.load()
"""
遍历0~255的像素值
得到
"""
list0 = []
for i in range(0, 255):
    list0.append(target(im_change, i))
max_fx = max(list0)  #得到最大类间方差
M = list0.index((max_fx))  #得到阈值
print("最大类间方差=", max_fx, "\t阈值=", M)

# """
# 输出结果
# """
# #输出初始图像
# plt.figure('初始')
# plt.imshow(im)
#
# #输出otsu分割后的图像
# img = np.array(np.zeros([n,m],dtype=int))
# for i in range(0,n):
#     for j in range(0,m):

示例#18

0

显示文件

文件： blockchain.py 项目： decisions/Truthcoin-POW

    def block_check(block, DB):
        def log_(txt): pass #return tools.log(txt)
        def tx_check(txs):
            start = copy.deepcopy(txs)
            out = []
            start_copy = []
            error_msg=['']
            while True:
                if start == []: return False  # Block passes this test
                if transactions.tx_check[start[-1]['type']](start[-1], out, error_msg, DB):
                    out.append(start.pop())
                else:
                    log_('bad block: ' +str(txs))
                    log_('error message: ' +str(error_msg))
                    return True  # Block is invalid
        if not isinstance(block, dict): return False
        if 'error' in block: return False
        if not tools.E_check(block, 'length', [int]):
            log_('no length')
            return False
        length =tools.db_get('length')
        if type(block['length'])!=type(1): 
            log_('wrong length type')
            return False
        if int(block['length']) != int(length) + 1:
            log_('wrong longth')
            return False
        if block['diffLength'] != hexSum(tools.db_get('diffLength'),
                                         hexInvert(block['target'])):
            log_('diflength error')
            return False
        if length >= 0:
            if tools.det_hash(tools.db_get(length, DB)) != block['prevHash']:
                log_('det hash error')
                return False
        if u'target' not in block.keys():
            log_('target error')
            return False
        half_way=tools.make_half_way(block)
        if tools.det_hash(half_way) > block['target']:
            log_('det hash error 2')

            return False
        if block['target'] != target.target(block['length']):
            log_('block: ' +str(block))
            log_('target: ' +str(target.target(block['length'])))
            log_('wrong target')
            return False
        earliest = median(recent_blockthings('times', custom.mmm))
        if 'time' not in block: 
            log_('no time')
            return False
        if block['time'] > time.time()+60*6: 
            log_('too late')
            return False
        if block['time'] < earliest: 
            log_('too early')
            return False
        if tx_check(block['txs']): 
            log_('tx check')
            return False
        return True

示例#19

0

显示文件

文件： single_target_single_sensor_train.py 项目： gorjida/MultiSensorMutiTargetRL

        #  sigma = max(.1,sigma)
        if episode_counter % 1500 == 0 and episode_counter > 0:
            sigma = sigma_max
            sigma = max(.1, sigma)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        # print(episodes_counter)
        scen = scenario(1, 1)
        bearing_var = 1E-2  # variance of bearing measurement
        # Randomly initialize target location
        x = 20000 * random.random() - 10000  # initial x-location
        y = 20000 * random.random() - 10000  # initial y-location
        xdot = 20 * random.random() - 10  # initial xdot-value
        ydot = 20 * random.random() - 10  # initial ydot-value
        t = [target([x, y], xdot, ydot, vel_var, vel_var, "CONS_V")]
        init_sensor_state = [
            10000 * random.random() - 5000, 10000 * random.random() - 5000, 3,
            -2
        ]
        init_target_state = [x, y, xdot, ydot]  # initialize target state
        init_covariance = np.diag([
            MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY
        ])  # initial covariance of state estimation
        temp_sensor_object = sensor("POLICY_COMM_LINEAR", init_sensor_state[0],
                                    init_sensor_state[1],
                                    np.array(params[0]["weight"]))

        init_for_tracker = [
            x + np.random.normal(0, 5), y + np.random.normal(0, 5),
            np.random.normal(0, 5),

示例#20

0

显示文件

def difficulty(DB, args):
    return (target.target(DB))

示例#21

0

显示文件

文件： gun.py 项目： VarlamovAM/python.labi

            if t2.live and b.hittest(t2):
                t2.live = 0
                t2.hit(canv)
                points.hit(canv)
            if t1.live == 0 and t2.live == 0:
                canv.itemconfig(screen1,
                                text='Вы уничтожили цели за ' + str(bullet) +
                                ' выстрелов')
                canv.bind('<Button-1>', '')
                canv.bind('<ButtonRelease-1>', '')
            b.time_live(canv, balls)
        canv.update()
        time.sleep(0.03)
        g1.targetting()
        g1.power_up()
    canv.itemconfig(screen1, text='')
    canv.delete(gun)
    root.after(750, new_game)


t1 = target.target(canv)
t2 = target.target(canv)
screen1 = canv.create_text(400, 300, text='', font='28')
g1 = gun()
bullet = 0
balls = []
points = points.points(canv)

new_game(canv)

tk.mainloop()

示例#22

0

显示文件

from target import target  #importing Target Generation module
from sol import sol  #importing Solution module
from verify import verify  #importing verify module
from performance import performance  #importing performance module

if __name__ == '__main__':  #calling main function
    import sys  #importing sys module
    import os  #importing os module
    #First argument i.e sys.argv[0] is always aes.py
    #Second argument i.e sys.argv[1] is the called function
    #Comparing the second command line argument with respective functions to call
    if sys.argv[1] == 'target':  #Comparing with target
        target(sys.argv[2], sys.argv[3])  #Calling target function
    elif sys.argv[1] == 'sol':  #Comparing with sol
        sol(sys.argv[2], sys.argv[3], sys.argv[4])  #Calling sol function
    elif sys.argv[1] == 'verify':  #Comparing with verify
        verify(sys.argv[2], sys.argv[3], sys.argv[4])  #Calling verify function
    elif sys.argv[1] == 'performance':  #Comparing with performance
        performance(sys.argv[2])  #Calling performance function

示例#23

0

显示文件

文件： tracker_with_noise_varying_var.py 项目： gorjida/MultiSensorMutiTargetRL

            np.zeros([2, 1]), 10)
        init_location_estimate = [
            init_location_estimate[0][0], init_location_estimate[1][0]
        ]
        init_velocity_estimate = [
            6 * random.random() - 3, 6 * random.random() - 3
        ]
        init_velocity_estimate = [init_target_state[2], init_target_state[3]]

        init_estimate = init_location_estimate + init_velocity_estimate
        init_covariance = np.diag([
            MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY
        ])  #initial covariance of state estimation

        t = target(init_target_state[0:2], init_target_state[2],
                   init_target_state[3], .1, .1,
                   "CONS_V")  #constant-velocity model for target motion
        A, B = t.constant_velocity(1E-10)  #Get motion model
        x_var = t.x_var
        y_var = t.y_var

        tracker_object = EKF_tracker(init_for_smc, init_covariance, A, B,
                                     x_var, y_var,
                                     bearing_var)  #create tracker object
        #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        s = sensor("POLICY_COMM")  #create sensor object (stochastic policy)
        #s = sensor("CONS_A")
        measure = measurement(bearing_var)  #create measurement object

        m = []

示例#24

0

显示文件

# File Paths to the input data
# Use argparse to dynamically input paths
paths = [
    'C:\\Users\\parth\\Desktop\\Python Workspace\\Senior Design\\Data\\hiv-1-700.fixed.fst',
    'C:\\Users\\parth\\Desktop\\Python Workspace\\Senior Design\\Data\\hiv-9086-9717.fixed.fst'
]

# Generate the probes/kmers for the Microarray & seqs that represent potential seqs for CRISPR
p = probe(paths).importSequences().generate()

# Print the probes to a csv file
# p.toCSV(p.kmers[0],'C:\Users\parth\Desktop\Python Workspace\Senior Design\Data\probes.csv')

# Generate the target sequences from the refrences HXB2 cell line
t = target(p.proto, paths).generate()

# Filter out target seqs that have missing bases '-' and create all possible valid pairs between the protospacer and the target sequnces.
#-------------------------------------------------------------------------------
inputSeqs = pd.DataFrame()
#p.proto = p.proto[0:1000]
for i, spacer in enumerate(p.proto[0]):
    if i % 1000 == 0:
        print(spacer, i)
    # Runs on the begining of the LTR (t.kmers[0]), the end of the LTR (t.kmers[1])
    for _, mer in enumerate(t.kmers[1]):
        if '-' not in mer:
            inputSeqs = inputSeqs.append(pd.DataFrame([spacer], [mer]))

inputSeqs = inputSeqs.reset_index()
inputSeqs.columns = ['target', 'gRNA']

示例#25

0

显示文件

文件： multi_target_multi_sensor_jpdaf.py 项目： gorjida/MultiSensorMutiTargetRL

                            [
                                x[target_counter] + np.random.normal(0, 5),
                                y[target_counter] + np.random.normal(0, 5),
                                np.random.normal(0, .1),
                                np.random.normal(0, .1)
                            ]
                        )  #init state for the tracker (tracker doesn't know about the initial state)
                    init_covariance = np.diag([
                        MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY,
                        MAX_UNCERTAINTY
                    ])  #initial covariance of state estimation
                    t = []
                    for i in range(0, num_targets):
                        t.append(
                            target(init_target_state[i][0:2],
                                   init_target_state[i][2],
                                   init_target_state[i][3], vel_var, vel_var,
                                   "CONS_V")
                        )  #constant-velocity model for target motion
                    A, B = t[0].constant_velocity(1E-10)  #Get motion model
                    Q = B.dot(np.diag([vel_var**2,
                                       vel_var**2])).dot(B.transpose())
                    D11 = ((A.transpose()).dot(Q)).dot(A)
                    D12 = -(A.transpose()).dot(Q)
                    x_var = t[0].x_var
                    y_var = t[0].y_var

                    tracker_object = []
                    for i in range(0, num_targets):
                        tracker_object.append(
                            EKF_tracker(init_for_smc[i],
                                        np.array(init_covariance), A, B, x_var,

示例#26

0

显示文件

文件： multiple_tracker_discrete_reward_with_internal_update_T210.py 项目： gorjida/MultiSensorMutiTargetRL

def run(args):
#if __name__=="__main__":
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    #method = args[0]
    #RBF_components = args[1]
    #MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(process_index+100)
    #process_index = 0
    #np.random.seed(process_index + 100)
    #vel_var = args[5]
    #num_targets = args[6]

    method = 0
    RBF_components = 20
    MLP_neurons = 50
    vel_var = .001
    num_targets = min(6,max(2,np.random.poisson(3)))
    num_targets = np.random.randint(2,10)
    #num_targets = 4


    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight2"] = np.random.normal(0, .3, [2, num_states_layer2])
        #params[0]["weight2"] = np.array([[  3.97573312,   0.4639474 ,   2.27280486,  12.9085868 ,
         #   3.45722461,   6.36735166],
         #[-11.87940874,   2.59549414,  -5.68556954,   2.87746786,
          #  7.08059984,   5.5631133 ]])

        params[0]["weight"] = np.array([[7.18777985, -13.68815256, 1.69010242, -5.62483187,
                           -4.30451483, 10.09592853],
                         [13.33104057, 13.60537864, 3.46939294, 0.8446329,
                         -14.79733566, -4.78599648]])

        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    weight_saver2_1 = []
    weight_saver2_2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    init_max_target = 3
    num_targets = init_max_target
    while episode_counter<N_max:
        if episode_counter%1000==0 and episode_counter>0:
            init_max_target +=1
            init_max_target = min(20,init_max_target)
        
        if episode_counter%100==0 and episode_counter>0:
            num_targets = np.random.randint(3,init_max_target+1)
        sigma = gen_learning_rate(episode_counter,sigma_max,.1,5000)
        sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*np.random.random([num_targets])-5000#initial x-location
        y = 10000 * np.random.random([num_targets]) - 5000#initial y-location
        xdot = 10*np.random.random([num_targets])-5#initial xdot-value
        ydot = 10 * np.random.random([num_targets]) - 5#initial ydot-value

        #TEMP
        #x = [2000,-2000]
        #y = [2000,2000]
        #xdot = [1,1]
        #ydot = [-1,-1]

        init_target_state = []
        init_for_smc = []
        for target_counter in range(0,num_targets):
            init_target_state.append([x[target_counter],y[target_counter],xdot[target_counter],ydot[target_counter]])#initialize target state
            init_for_smc.append([x[target_counter]+np.random.normal(0,5),y[target_counter]
                                 +np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)])#init state for the tracker (tracker doesn't know about the initial state)


        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation
        t = []
        for i in range(0,num_targets):
            t.append(target(init_target_state[i][0:2], init_target_state[i][2],
                            init_target_state[i][3], vel_var, vel_var, "CONS_V"))#constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)#Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0,num_targets):
            tracker_object.append(EKF_tracker(init_for_smc[i], np.array(init_covariance), A,B,x_var,y_var,bearing_var))#create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        for i in range(0,num_targets):
            x_truth.append([])
            y_truth.append([])
            x_vel_truth.append([])
            y_vel_truth.append([])
            uncertainty.append([])
            vel_error.append([])
            x_est.append([])
            y_est.append([])
            x_vel_est.append([])
            y_vel_est.append([])
            pos_error.append([])
            innovation.append([])
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_state_out_layer = []
        episode_MLP_state = []
        episode_actions = []
        avg_uncertainty= []
        max_uncertainty = []

        while episode_condition:
            temp_m = []
            input_state_temp = []
            for i in range(0,num_targets):
                t[i].update_location()
                temp_m.append(measure.generate_bearing(t[i].current_location,s.current_location))

            m.append(temp_m)
            temp_reward = []
            target_actions = []
            for i in range(0,num_targets):
                tracker_object[i].update_states(s.current_location, m[-1][i])
                normalized_innovation = (tracker_object[i].innovation_list[-1])/tracker_object[i].innovation_var[-1]
                #print(normalized_innovation)
                #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                current_state = list(tracker_object[i].x_k_k.reshape(len(tracker_object[i].x_k_k))) + list(s.current_location)

                #print(current_state)
                #state normalization
                x_slope = 2.0/(scen.x_max-scen.x_min)
                y_slope = 2.0 / (scen.y_max - scen.y_min)

                x_slope_sensor = 2.0 / (40000)
                y_slope_sensor = 2.0 / (40000)

                vel_slope = 2.0/(scen.vel_max-scen.vel_min)
                #normalization
                current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
                current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
                current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
                current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
                current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
                current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


                #Refactor states based on the usage
                if method==0 or method==2:
                    input_state = current_state
                    input_state_temp.append(input_state) #store input-sates
                elif method==1:
                    #Generate states for the RBF input
                    input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                    input_state = list(input_state[0])


                target_actions.append(s.generate_action(params,input_state,.01))
                estimate = tracker_object[i].x_k_k
                episode_state.append(input_state) ####Neeed to get modified
                if method==2: episode_MLP_state.append(extra_information) #need to get modified
                truth = t[i].current_location
                x_est[i].append(estimate[0])
                y_est[i].append(estimate[1])
                x_vel_est[i].append(estimate[2])
                y_vel_est[i].append(estimate[3])
                x_truth[i].append(truth[0])
                y_truth[i].append(truth[1])
                x_vel_truth[i].append(t[i].current_velocity[0])
                y_vel_truth[i].append(t[i].current_velocity[1])
                vel_error[i].append(np.linalg.norm(estimate[2:4]-np.array([t[i].current_velocity[0],t[i].current_velocity[1]]).reshape(2,1)))
                pos_error[i].append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
                innovation[i].append(normalized_innovation[0])
                unormalized_uncertainty = np.sum(tracker_object[i].p_k_k.diagonal())
                #if unormalized_uncertainty>MAX_UNCERTAINTY:
                #   normalized_uncertainty = 1
                #else:
                #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
                uncertainty[i].append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                #if len(uncertainty[i])<window_size+window_lag:
                 #   temp_reward.append(0)
                #else:
                 #   current_avg = np.mean(uncertainty[i][-window_size:])
                  #  prev_avg = np.mean(uncertainty[i][-(window_size+window_lag):-window_lag])
                   # if current_avg<prev_avg or uncertainty[i][-1]<.1:
                    #if current_avg < prev_avg:
                    #    temp_reward.append(1)
                    #else:
                     #   temp_reward.append(0)

            this_uncertainty = []
            [this_uncertainty.append(uncertainty[x][-1]) for x in range(0, num_targets)]
            avg_uncertainty.append(np.mean(this_uncertainty))
            max_uncertainty.append(np.max(this_uncertainty))
            if len(avg_uncertainty) < window_size + window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(avg_uncertainty[-window_size:])
                prev_avg = np.mean(avg_uncertainty[-(window_size + window_lag):-window_lag])
                if current_avg < prev_avg or avg_uncertainty[-1] < .1:
                    # if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #voting
            #if np.mean(temp_reward)>.5:
             #   reward.append(np.mean(temp_reward))
            #else:
             #   reward.append(np.mean(temp_reward))

            #if sum(reward)>1100 and num_targets>2: sys.exit(1)

            #Do something on target_actions
            #Create feature-vector from generated target actions

            normalized_state,index_matrix1,index_matrix2,slope = s.update_location_decentralized(target_actions,sigma,params) #Update the sensor location based on all individual actions
            #index_matrix: an n_s \times T matrix that shows the derivative of state in the output layer to the action space in the internal-layer

            backpropagated_to_internal_1 = index_matrix1.dot(np.array(input_state_temp))#8 by 6
            backpropagated_to_internal_2 = index_matrix2.dot(np.array(input_state_temp))# 8 by 6

            episode_state_out_layer.append(normalized_state)
            episode_state.append([backpropagated_to_internal_1,backpropagated_to_internal_2]) #each entry would be a T \times 6 matrix with T being the number of targets
            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        for i in range(0,num_targets):
            if np.mean(pos_error[i])>10000:
                condition = False
                break
                episode_condition = False
                episode_counter-=1

        if not condition:
            #print("OOPSSSS...")
            continue
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False


        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-12,20000)
            internal_rate = gen_learning_rate(episode_counter, 3*1E-5, 1E-15, 20000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                #state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)
                out_state = np.array(episode_state_out_layer[e]).reshape(len(episode_state_out_layer[e]),1)
                backpropagated_terms = episode_state[e]

                #calculate gradient
                if method==0:
                    deriv_with_out_state = (episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).transpose().dot(params[0]['weight2']) #1 by n_s==> derivative of F with respect to the output state-vector
                    internal_gradiant1 = deriv_with_out_state.dot(backpropagated_terms[0]) #1 by 6
                    internal_gradiant2 = deriv_with_out_state.dot(backpropagated_terms[1]) #1 by 6
                    internal_gradiant = np.concatenate([internal_gradiant1,internal_gradiant2])

                    #gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                    gradiant_out_layer = ((episode_actions[e].reshape(2, 1) - params[0]['weight2'].dot(out_state)).dot(
                        out_state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant_out_layer))>1E2 or np.max(np.abs(internal_gradiant))>1E2:
                    #print("OOPPSSSS...")
                    continue #clip large gradients

                if method==0:
                    adjustment_term_out_layer = gradiant_out_layer*normalized_discounted_return[e]#an unbiased sample of return
                    adjustment_term_internal_layer = internal_gradiant*normalized_discounted_return[e]
                    params[0]['weight2'] += rate * adjustment_term_out_layer
                    params[0]['weight'] += internal_rate* adjustment_term_internal_layer
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            flatted_weights1 = list(params[0]['weight'][0, :]) + list(params[0]['weight'][1, :])
            flatted_weights2 = list(params[0]['weight2'][0, :]) + list(params[0]['weight2'][1, :])
            temp1 = []
            [temp1.append(str(x)) for x in flatted_weights1]
            temp2 = []
            [temp2.append(str(x)) for x in flatted_weights2]

            weight_file.write("\t".join(temp1)+"$$$"+"\t".join(temp2)+"\n")
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            weight_saver1.append(params[0]['weight'][0][0])
            weight_saver2.append(params[0]['weight'][1][0])

            weight_saver2_1.append(params[0]['weight2'][0][0])
            weight_saver2_2.append(params[0]['weight2'][1][0])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver,reverse=True)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

示例#27

0

显示文件

    def block_check(block, DB):
        def log_(txt):
            pass  #return tools.log(txt)

        def tx_check(txs):
            start = copy.deepcopy(txs)
            start.reverse()
            out = []
            start_copy = []
            error_msg = ['']
            while True:
                if start == []: return False  # Block passes this test
                if transactions.tx_check[start[-1]['type']](start[-1], out,
                                                            error_msg, DB):
                    out.append(start.pop())
                else:
                    log_('bad block: ' + str(txs))
                    log_('error message: ' + str(error_msg))
                    return True  # Block is invalid
            return True

        if not isinstance(block, dict): return False
        if 'error' in block: return False
        if not tools.E_check(block, 'length', [int]):
            log_('no length')
            return False
        length = tools.db_get('length')
        if type(block['length']) != type(1):
            log_('wrong length type')
            return False
        if int(block['length']) != int(length) + 1:
            log_('wrong longth')
            return False
        if block['diffLength'] != hexSum(tools.db_get('diffLength'),
                                         hexInvert(block['target'])):
            log_('diflength error')
            return False
        if length >= 0:
            if tools.det_hash(tools.db_get(length, DB)) != block['prevHash']:
                log_('det hash error')
                return False
        if u'target' not in block.keys():
            log_('target error')
            return False
        half_way = tools.make_half_way(block)
        if tools.det_hash(half_way) > block['target']:
            log_('det hash error 2')

            return False
        if block['target'] != target.target(block['length']):
            log_('block: ' + str(block))
            log_('target: ' + str(target.target(block['length'])))
            log_('wrong target')
            return False
        earliest = median(recent_blockthings('times', custom.mmm))
        if 'time' not in block:
            log_('no time')
            return False
        if block['time'] > time.time() + 60 * 6:
            log_('too late')
            return False
        if block['time'] < earliest:
            log_('too early')
            return False
        if tx_check(block['txs']):
            log_('tx check')
            return False
        return True

示例#28

0

显示文件

文件： multiple_tracker_discrete_reward_with_internal_update_T210_varying_var.py 项目： gorjida/MultiSensorMutiTargetRL

            )  #init state for the tracker (tracker doesn't know about the initial state)

        #temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        #init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        #init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        #init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        #init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        #init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([
            MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY
        ])  #initial covariance of state estimation
        t = []
        for i in range(0, num_targets):
            t.append(
                target(init_target_state[i][0:2], init_target_state[i][2],
                       init_target_state[i][3], vel_var, vel_var,
                       "CONS_V"))  #constant-velocity model for target motion
        A, B = t[0].constant_velocity(1E-10)  #Get motion model
        x_var = t[0].x_var
        y_var = t[0].y_var

        tracker_object = []
        for i in range(0, num_targets):
            tracker_object.append(
                EKF_tracker(init_for_smc[i], np.array(init_covariance), A, B,
                            x_var, y_var, bearing_var))  #create tracker object
            #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method == 0:
            s = sensor("POLICY_COMM_LINEAR"

示例#29

0

显示文件

文件： rocket_population.py 项目： shankarutthandy/coding-challenges

def setup():
    global roc, tget
    size(800, 800)
    tget = target((400, 50))
    roc = population(target=tget, population=5)

示例#30

0

显示文件

文件： tracker_discrete_reward_varying_var_version2.py 项目： gorjida/MultiSensorMutiTargetRL

def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy
    
    method = args[0]
    RBF_components = args[1]
    MLP_neurons = args[2]
    process_index = args[3]
    folder_name = args[4]
    np.random.seed(1 + 100)
    vel_var = args[5]
    
    np.random.seed(process_index)
    print("Starting Thread:" + str(process_index))

    #Initialize all the parameters
    params ={0:{},1:{},2:{}}
    if method==0:
        params[0]["weight"] = np.random.normal(0, .3, [2, num_states])
        #params[0]["weight"] = np.array([[ 1.45702249, -1.17664153, -0.11593174,  1.02967173, -0.25321044,
         #0.09052774],
       #[ 0.67730786,  0.3213561 ,  0.99580938, -2.39007038, -1.16340594,
        #-1.77515938]])
    elif method==1:
        featurizer = sklearn.pipeline.FeatureUnion([("rbf1", RBFSampler(gamma=rbf_var, n_components=RBF_components, random_state=1))])
        featurizer.fit(np.array(list_of_states))  # Use this featurizer for normalization
        params[1]["weight"] = np.random.normal(0, 1, [2, RBF_components])
    elif method==2:
        params[2]["weigh1"] = np.random.normal(0, 1, [MLP_neurons, num_states])
        params[2]["bias1"] = np.random.normal(0,1,[MLP_neurons,1])
        params[2]["weigh2"] = np.random.normal(0, 1, [2, MLP_neurons])
        params[2]["bias2"] = np.random.normal(0, 1, [2, 1])

    return_saver = []
    error_saver = []
    episode_counter = 0
    weight_saver1 = []
    weight_saver2 = []
    #for episode_counter in range(0,N_max):
    #Training parameters
    avg_reward = []
    avg_error = []
    var_reward = []
    training = True


    result_folder = base_path+folder_name+"/"
    reward_file = open(result_folder+"reward_noise:"+str(vel_var)+"_"+str(process_index)+  "_linear_6states.txt","a")
    error_file = open(result_folder + "error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    error_file_median = open(result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                      "a")
    var_file = open(result_folder + "var_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    var_error_file = open(result_folder + "var_error_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")
    weight_file = open(result_folder + "weight_noise:" + str(vel_var) +"_"+str(process_index)+ "_linear_6states.txt", "a")

    #flatten initial weight and store the values
    if method==0:
        weight = params[0]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp)+"\n")
    elif method==1:
        weight = params[1]['weight']
        flatted_weights = list(weight[0, :]) + list(weight[1, :])
        temp = []
        [temp.append(str(x)) for x in flatted_weights]
        weight_file.write("\t".join(temp) + "\n")
    elif method==2:
        pass

    #weight = np.reshape(np.array(weights[0]), [2, 6])
    sigma = sigma_max
    while episode_counter<N_max:
        #sigma = gen_learning_rate(episode_counter,sigma_max,.1,20000)
        if episode_counter%1500==0 and episode_counter>0:
            sigma-= .15
            sigma = max(.1,sigma)
        #sigma = sigma_max
        discounted_return = np.array([])
        discount_vector = np.array([])
        #print(episodes_counter)
        scen = scenario(1,1)
        bearing_var = 1E-2#variance of bearing measurement
        #Target information
        x = 10000*random.random()-5000#initial x-location
        y = 10000 * random.random() - 5000#initial y-location
        xdot = 10*random.random()-5#initial xdot-value
        ydot = 10 * random.random() - 5#initial ydot-value
        init_target_state = [x,y,xdot,ydot]#initialize target state
        init_for_smc = [x+np.random.normal(0,5),y+np.random.normal(0,5),np.random.normal(0,5),np.random.normal(0,5)]#init state for the tracker (tracker doesn't know about the initial state)
        #init_for_smc = [x, y, xdot, ydot]
        init_sensor_state = [10000*random.random()-5000,10000 * random.random() - 5000,3,-2]#initial sensor-state

        temp_loc = np.array(init_target_state[0:2]).reshape(2,1)
        init_location_estimate = temp_loc+0*np.random.normal(np.zeros([2,1]),10)
        init_location_estimate = [init_location_estimate[0][0],init_location_estimate[1][0]]
        init_velocity_estimate = [6*random.random()-3,6*random.random()-3]
        init_velocity_estimate = [init_target_state[2],init_target_state[3]]
        init_estimate = init_location_estimate+init_velocity_estimate
        init_covariance = np.diag([MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY,MAX_UNCERTAINTY])#initial covariance of state estimation

        t = target(init_target_state[0:2], init_target_state[2], init_target_state[3], vel_var, vel_var, "CONS_V")#constant-velocity model for target motion
        A, B = t.constant_velocity(1E-10)#Get motion model
        x_var = t.x_var
        y_var = t.y_var

        tracker_object = EKF_tracker(init_for_smc, init_covariance, A,B,x_var,y_var,bearing_var)#create tracker object
        #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

        #Initialize sensor object
        if method==0:
            s = sensor("POLICY_COMM_LINEAR")#create sensor object (stochastic policy)
        elif method==1:
            s = sensor("POLICY_COMM_RBF")
        elif method==2:
            s = sensor("POLICY_COMM_MLP")
        measure = measurement(bearing_var)#create measurement object

        m = []
        x_est = []; y_est = []; x_vel_est = []; y_vel_est = []
        x_truth = [];
        y_truth = [];
        x_vel_truth = [];
        y_vel_truth = []
        uncertainty = []
        vel_error = []
        pos_error = []
        iteration = []
        innovation = []
        reward = []
        episode_condition = True
        n=0
        violation = 0
        #store required information
        episode_state = []
        episode_MLP_state = []
        episode_actions = []

        while episode_condition:
            t.update_location()
            m.append(measure.generate_bearing(t.current_location,s.current_location))
            tracker_object.update_states(s.current_location, m[-1])
            normalized_innovation = (tracker_object.innovation_list[-1])/tracker_object.innovation_var[-1]
            #print(normalized_innovation)
            #if (normalized_innovation<1E-4 or n<10) and n<200:
                #end of episode
            current_state = list(tracker_object.x_k_k.reshape(len(tracker_object.x_k_k))) + list(s.current_location)

            #print(current_state)
            #state normalization
            x_slope = 2.0/(scen.x_max-scen.x_min)
            y_slope = 2.0 / (scen.y_max - scen.y_min)

            x_slope_sensor = 2.0 / (40000)
            y_slope_sensor = 2.0 / (40000)

            vel_slope = 2.0/(scen.vel_max-scen.vel_min)
            #normalization
            current_state[0] = -1+x_slope*(current_state[0]-scen.x_min)
            current_state[1] = -1 + y_slope * (current_state[1] - scen.y_min)
            current_state[2] = -1 + vel_slope * (current_state[2] - scen.vel_min)
            current_state[3] = -1 + vel_slope * (current_state[3] - scen.vel_min)
            current_state[4] = -1 + x_slope * (current_state[4] -scen.x_min)
            current_state[5] = -1 + y_slope * (current_state[5] - scen.y_min)


            #Refactor states based on the usage
            if method==0 or method==2:
                input_state = current_state
            elif method==1:
                #Generate states for the RBF input
                input_state =  featurizer.transform(np.array(current_state).reshape(1,len(current_state)))
                input_state = list(input_state[0])


            extra_information = s.update_location_new(params,input_state,sigma)
            estimate = tracker_object.x_k_k
            episode_state.append(input_state)
            if method==2: episode_MLP_state.append(extra_information) #Output of the first layer for Gradient calculation
            truth = t.current_location
            x_est.append(estimate[0])
            y_est.append(estimate[1])
            x_vel_est.append(estimate[2])
            y_vel_est.append(estimate[3])
            x_truth.append(truth[0])
            y_truth.append(truth[1])
            x_vel_truth.append(t.current_velocity[0])
            y_vel_truth.append(t.current_velocity[1])
            vel_error.append(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]]).reshape(2,1)))
            pos_error.append(np.linalg.norm(estimate[0:2]-np.array(truth).reshape(2,1)))
            innovation.append(normalized_innovation[0])
            unormalized_uncertainty = np.sum(tracker_object.p_k_k.diagonal())
            #if unormalized_uncertainty>MAX_UNCERTAINTY:
             #   normalized_uncertainty = 1
            #else:
             #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty
            uncertainty.append((1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
            if len(uncertainty)<window_size+window_lag:
                reward.append(0)
            else:
                current_avg = np.mean(uncertainty[-window_size:])
                prev_avg = np.mean(uncertainty[-(window_size+window_lag):-window_lag])
                if current_avg<prev_avg or uncertainty[-1]<.1:
                #if current_avg < prev_avg:
                    reward.append(1)
                else:
                    reward.append(0)

            #reward.append(-1*uncertainty[-1])
            #update return
            discount_vector = gamma*np.array(discount_vector)
            discounted_return+= (1.0*reward[-1])*discount_vector
            new_return = 1.0*reward[-1]
            list_discounted_return = list(discounted_return)
            list_discounted_return.append(new_return)
            discounted_return = np.array(list_discounted_return)

            list_discount_vector = list(discount_vector)
            list_discount_vector.append(1)
            discount_vector = np.array(list_discount_vector)
            iteration.append(n)
            if n>episode_length: break
            n+=1

        #Based on the return from the episode, update parameters of the policy model
        #Normalize returns by the length of episode
        #if episode_counter%10==0 and episode_counter>0: print(weight_saver[-1])

        prev_params = dict(params)
        condition = True
        if np.mean(pos_error)>10000:
            continue
            episode_condition = False
            episode_counter-=1
        #if episode_counter%100==0 and training:
            #print("Starting the evaluation phase...")
            #training = False
            #episode_condition = False
        condition = True
        if episode_condition and training:
            normalized_discounted_return = discounted_return
            episode_actions = s.sensor_actions
            #init_weight = np.array(weight)
            rate = gen_learning_rate(episode_counter,learning_rate,1E-8,10000)
            total_adjustment = np.zeros(np.shape(weight))
            for e in range(0,len(episode_actions)):
                #calculate gradiant
                state = np.array(episode_state[e]).reshape(len(episode_state[e]),1)

                #calculate gradient
                if method==0:
                    gradiant = ((episode_actions[e].reshape(2,1)-params[0]['weight'].dot(state)).dot(state.transpose()))/sigma**2#This is the gradiant
                elif method==1:
                    gradiant = ((episode_actions[e].reshape(2, 1) - params[1]['weight'].dot(state)).dot(
                        state.transpose())) / sigma ** 2  # This is the gradiant
                elif method==2:
                    #Gradient for MLP
                    pass

                if np.max(np.abs(gradiant))>1E2: continue #clip large gradients

                if method==0:
                    adjustment_term = gradiant*normalized_discounted_return[e]#an unbiased sample of return
                    params[0]['weight'] += rate * adjustment_term
                elif method==1:
                    adjustment_term = gradiant * normalized_discounted_return[e]  # an unbiased sample of return
                    params[1]['weight'] += rate * adjustment_term
                elif method==2:
                    #Gradient for MLP
                    pass

            #if not condition:
             #   weight = prev_weight
              #  continue

            episode_counter+=1
            #flatted_weights = list(weight[0, :]) + list(weight[1, :])
            #temp = []
            #[temp.append(str(x)) for x in flatted_weights]
            #weight_file.write("\t".join(temp)+"\n")
            #weight_saver1.append(weight[0][0])
            #weight_saver2.append(weight[0][1])
        else:
            #print("garbage trajectory: no-update")
            pass


        #if not training:
        return_saver.append(sum(reward))

        error_saver.append(np.mean(pos_error))

        #print(len(return_saver),n)
        if episode_counter%100 == 0 and episode_counter>0:
            # if episode_counter%100==0 and episode_counter>0:
            print(episode_counter, np.mean(return_saver), sigma)
            #print(params[method]['weight'])
            #weight = np.reshape(np.array(weights[episode_counter]), [2, 6])
            #print(weight)
            reward_file.write(str(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            error_file.write(str(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            error_file_median.write(str(np.median(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_error_file.write(str(np.var(sorted(error_saver)[0:int(.95*len(error_saver))])) + "\n")
            var_file.write(str(np.var(sorted(return_saver)[0:int(.95*len(return_saver))]))+"\n")
            #weight_file.write(str(np.mean(return_saver)) + "\n")

            avg_reward.append(np.mean(sorted(return_saver)[0:int(.95*len(return_saver))]))
            avg_error.append(np.mean(sorted(error_saver)[0:int(.95*len(error_saver))]))
            var_reward.append(np.var(return_saver))
            reward_file.close()
            var_file.close()
            error_file.close()
            error_file_median.close()
            var_error_file.close()
            weight_file.close()

            reward_file = open(
                result_folder + "reward_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file = open(
                result_folder + "error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_file = open(
                result_folder + "var_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            var_error_file = open(
                result_folder + "var_error_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")
            weight_file = open(
                result_folder + "weight_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt", "a")
            error_file_median = open(
                result_folder + "error_median_noise:" + str(vel_var) + "_" + str(process_index) + "_linear_6states.txt",
                "a")

            return_saver = []
            error_saver = []
        num_episodes.append(n)

示例#31

0

显示文件

list_len = len(image_list)

image_name = image_path + 'tenka_1.png'
test = prepare_dataset(image_name)
test = chainer.as_variable(xp.array(test).astype(xp.float32)).reshape(
    1, 3, 64, 64)

outdir = "./output/"
if not os.path.exists(outdir):
    os.mkdir(outdir)

encdec = EncDec()
encdec.to_gpu()
ed_opt = set_optimizer(encdec)

target = target()

for epoch in range(epochs):
    sum_gen_loss = 0
    for batch in range(0, iterations, framesize):
        frame_box = []
        rnd = np.random.randint(1, 1000 - framesize)
        for index in range(framesize):
            filename = image_path + "tenka_" + str(rnd + index) + ".png"
            frame = prepare_dataset(filename)
            frame_box.append(frame)

        frames = chainer.as_variable(xp.array(frame_box).astype(xp.float32))

        x = frames[0:framesize - 1]
        t = frames[1:framesize]

示例#32

0

显示文件

文件： api.py 项目： AltCoinsLand/basiccoin

def difficulty(DB, args): return(target.target(DB))
def my_balance(DB, args, address='default'):

示例#33

0

显示文件

def run(args):
    # initialize parameters of interest
    # Method:
    # 0: linear policy
    # 1: RBF policy
    # 2: MLP policy

    vel_var = args[0]
    heading_rate = args[1]
    experiment_folder_name = args[2]
    file = open(
        base_path + "/" + experiment_folder_name + "/best_data_" +
        str(heading_rate), "w")
    # initialize actor parameters
    MAX_UNCERTAINTY = 1E9

    num_states = 6
    weight = np.random.normal(0, 1, [2, num_states])

    sigma_max = 1
    num_episodes = []
    gamma = .99

    episode_length = 1500
    learning_rate = 1E-3
    min_learning_rate = 1E-6
    N_max = 200

    window_size = 50
    window_lag = 10
    return_saver = []

    weight_saver1 = []
    weight_saver2 = []

    total_error = {}
    total_error_variance = {}
    total_reward = {}
    #for episode_counter in range(0,N_max):
    #Training parameters
    print("heading-rate=" + str(heading_rate))
    for xdot_sensor in np.arange(-15, 16, 1):
        for ydot_sensor in np.arange(-15, 16, 1):

            episode_counter = 0
            avg_reward = []
            var_reward = []
            error_saver = []
            while episode_counter < N_max:
                sigma = gen_learning_rate(episode_counter, sigma_max, .1, 5000)
                sigma = sigma_max
                discounted_return = np.array([])
                discount_vector = np.array([])
                #print(episodes_counter)
                scen = scenario(1, 1)
                bearing_var = 1E-2  #variance of bearing measurement
                #Target information
                x = 10000 * random.random() - 5000  #initial x-location
                y = 10000 * random.random() - 5000  #initial y-location
                xdot = 10 * random.random() - 5  #initial xdot-value
                ydot = 10 * random.random() - 5  #initial ydot-value
                #x = 250; y = 50; xdot = 7; ydot = -5

                init_target_state = [x, y, xdot,
                                     ydot]  #initialize target state
                init_for_smc = [
                    x + np.random.normal(0, 5), y + np.random.normal(0, 5),
                    np.random.normal(0, 5),
                    np.random.normal(0, 5)
                ]  #init state for the tracker (tracker doesn't know about the initial state)
                #init_for_smc = [x, y, xdot, ydot]
                init_sensor_state = [
                    10000 * random.random() - 5000,
                    10000 * random.random() - 5000, 3, -2
                ]  #initial sensor-state

                temp_loc = np.array(init_target_state[0:2]).reshape(2, 1)
                init_location_estimate = temp_loc + 0 * np.random.normal(
                    np.zeros([2, 1]), 10)
                init_location_estimate = [
                    init_location_estimate[0][0], init_location_estimate[1][0]
                ]
                init_velocity_estimate = [
                    6 * random.random() - 3, 6 * random.random() - 3
                ]
                init_velocity_estimate = [
                    init_target_state[2], init_target_state[3]
                ]

                init_estimate = init_location_estimate + init_velocity_estimate
                init_covariance = np.diag([
                    MAX_UNCERTAINTY, MAX_UNCERTAINTY, MAX_UNCERTAINTY,
                    MAX_UNCERTAINTY
                ])  #initial covariance of state estimation

                t = target(
                    init_target_state[0:2], init_target_state[2],
                    init_target_state[3], vel_var, vel_var,
                    "CONS_V")  #constant-velocity model for target motion
                A, B = t.constant_velocity(1E-10)  #Get motion model
                x_var = t.x_var
                y_var = t.y_var

                tracker_object = EKF_tracker(
                    init_for_smc, init_covariance, A, B, x_var, y_var,
                    bearing_var)  #create tracker object
                #smc_object = smc_tracker(A,B,x_var,y_var,bearing_var,1000,np.array(init_for_smc))

                s = sensor(
                    "CONS_V", [0, 0], [xdot_sensor, ydot_sensor],
                    heading_rate)  #create sensor object (stochastic policy)
                #s = sensor("CONS_V")
                measure = measurement(bearing_var)  #create measurement object

                m = []
                x_est = []
                y_est = []
                x_vel_est = []
                y_vel_est = []
                x_truth = []
                y_truth = []
                x_vel_truth = []
                y_vel_truth = []
                uncertainty = []

                vel_error = []
                pos_error = []
                iteration = []
                innovation = []

                reward = []
                episode_condition = True
                n = 0
                violation = 0
                #store required information
                episode_state = []
                episode_actions = []

                while episode_condition:

                    #if n>50: episode_condition=False
                    #update location of target and sensor + generate new measurement
                    #Also, run tracker object
                    t.update_location()
                    m.append(
                        measure.generate_bearing(t.current_location,
                                                 s.current_location))
                    tracker_object.update_states(s.current_location, m[-1])
                    #if len(tracker_object.meas_vec)>20:
                    #   tmp = np.zeros([2,2])
                    #  for n in range(0,10):
                    #     vector = tracker_object.meas_vec[-1-n]
                    #    cov = (vector.transpose().dot(vector))/bearing_var
                    #   sliced_cov = np.array([[cov[0,0],cov[0,1]],[cov[1,0],cov[1,1]]])
                    #  tmp+= sliced_cov

                    #Fisher_matrix = tmp/10.0
                    #crlb = np.linalg.inv(Fisher_matrix)
                    #print(crlb.diagonal())

                    #create state-vector

                    normalized_innovation = (
                        tracker_object.innovation_list[-1]
                    ) / tracker_object.innovation_var[-1]

                    #print(normalized_innovation)
                    #if (normalized_innovation<1E-4 or n<10) and n<200:
                    #end of episode
                    current_state = list(
                        tracker_object.x_k_k.reshape(len(
                            tracker_object.x_k_k))) + list(s.current_location)

                    #print(current_state)
                    #state normalization
                    x_slope = 2.0 / (scen.x_max - scen.x_min)
                    y_slope = 2.0 / (scen.y_max - scen.y_min)
                    vel_slope = 2.0 / (scen.vel_max - scen.vel_min)
                    #normalization
                    current_state[0] = -1 + x_slope * (current_state[0] -
                                                       scen.x_min)
                    current_state[1] = -1 + y_slope * (current_state[1] -
                                                       scen.y_min)
                    current_state[2] = -1 + vel_slope * (current_state[2] -
                                                         scen.vel_min)
                    current_state[3] = -1 + vel_slope * (current_state[3] -
                                                         scen.vel_min)
                    current_state[4] = -1 + x_slope * (current_state[4] -
                                                       scen.x_min)
                    current_state[5] = -1 + y_slope * (current_state[5] -
                                                       scen.y_min)
                    s.update_location(weight, sigma, np.array(current_state))
                    estimate = tracker_object.x_k_k
                    episode_state.append(current_state)

                    truth = t.current_location
                    x_est.append(estimate[0])
                    y_est.append(estimate[1])
                    x_vel_est.append(estimate[2])
                    y_vel_est.append(estimate[3])

                    x_truth.append(truth[0])
                    y_truth.append(truth[1])

                    x_vel_truth.append(t.current_velocity[0])
                    y_vel_truth.append(t.current_velocity[1])

                    #print(estimate[-1])
                    #print(np.linalg.norm(estimate[2:4]-np.array([t.current_velocity[0],t.current_velocity[1]])))
                    vel_error.append(
                        np.linalg.norm(estimate[2:4] - np.array([
                            t.current_velocity[0], t.current_velocity[1]
                        ]).reshape(2, 1)))
                    pos_error.append(
                        np.linalg.norm(estimate[0:2] -
                                       np.array(truth).reshape(2, 1)))
                    innovation.append(normalized_innovation[0])

                    unormalized_uncertainty = np.sum(
                        tracker_object.p_k_k.diagonal())
                    #if unormalized_uncertainty>MAX_UNCERTAINTY:
                    #   normalized_uncertainty = 1
                    #else:
                    #   normalized_uncertainty = (1.0/MAX_UNCERTAINTY)*unormalized_uncertainty

                    uncertainty.append(
                        (1.0 / MAX_UNCERTAINTY) * unormalized_uncertainty)
                    if len(uncertainty) < window_size + window_lag:
                        reward.append(0)
                    else:
                        current_avg = np.mean(uncertainty[-window_size:])
                        prev_avg = np.mean(
                            uncertainty[-(window_size +
                                          window_lag):-window_lag])
                        if current_avg < prev_avg or uncertainty[-1] < .1:
                            #if current_avg < prev_avg:
                            reward.append(1)
                        else:
                            reward.append(0)

                    #reward.append(-1*uncertainty[-1])
                    #update return

                    discount_vector = gamma * np.array(discount_vector)
                    #discount_vector = list(discount_vector)
                    #discount_vector.append(1)

                    discounted_return += (1.0 * reward[-1]) * discount_vector
                    new_return = 1.0 * reward[-1]
                    list_discounted_return = list(discounted_return)
                    list_discounted_return.append(new_return)
                    discounted_return = np.array(list_discounted_return)

                    list_discount_vector = list(discount_vector)
                    list_discount_vector.append(1)
                    discount_vector = np.array(list_discount_vector)

                    iteration.append(n)
                    if n > episode_length: break
                    n += 1
                num_episodes.append(n)
                error_saver.append(np.mean(pos_error))
                return_saver.append(sum(reward))
                episode_counter += 1

            total_error[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(error_saver)[0:int(.95 * N_max)])
            total_reward[str(xdot_sensor) + "|" + str(ydot_sensor)] = np.mean(
                sorted(return_saver, reverse=True)[0:int(.95 * N_max)])
            total_error_variance[str(xdot_sensor) + "|" +
                                 str(ydot_sensor)] = np.var(
                                     sorted(error_saver)[0:int(.95 * N_max)])

    sorted_error = sorted(total_error.items(), key=operator.itemgetter(1))
    key = sorted_error[0][0]

    file.write("Min Error=" + str(sorted_error[0][1]) + "\n")
    file.write("Best params=" + str(key) + "\n")
    file.close()