Exemplo n.º 1
0
    def test(self, test_num):
        """train method.
        """
        saver = tf.train.Saver()
        saver.restore(self.sess, "./save/model.ckpt")

        dr = []
        for i in range(test_num):
            acc_r = [0]

            observation = self.env.reset(self.testRainData[i])
            #print('obtest=',observation)
            while True:
                # if total_steps-MEMORY_SIZE > 9000: env.render()

                action = self.choose_action(observation)

                f_action = (action - (self.n_actions - 1) / 2) / (
                    (self.n_actions) / 4)  # [-2 ~ 2] float actions
                observation_, reward, done, info = self.env.step(
                    np.array([f_action]), self.testRainData[i])
                #print(observation_, reward, done, info)
                reward /= 10  # normalize to a range of (-1, 0)
                acc_r.append(reward + acc_r[-1])  # accumulated reward

                #self.store_transition(observation, action, reward, observation_)

                observation = observation_
                #print('obtest=',observation)
                if done:
                    break
                    dr.append(acc_r)

            #对比HC
            change_rain.copy_result('./test_result/HC/compare_tem_HC' +
                                    str(i) + '.inp',
                                    self.env.orf_rain + '.inp')  #还原
            tem_etime = self.env.date_time[self.env.iten]
            set_datetime.set_date(
                self.env.sdate, self.env.edate, self.env.stime, tem_etime,
                './test_result/HC/compare_tem_HC' + str(i) + '.inp')
            self.env.simulation('./test_result/HC/compare_tem_HC' + str(i) +
                                '.inp')

            #history['episode'].append(i)
            #history['Episode_reward'].append(episode_reward)
            #print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward))
            sout = './test_result/DDQN/DDQN_' + str(i) + '.rpt'
            sin = self.env.staf + '.rpt'
            change_rain.copy_result(sout, sin)
            #self.env.copy_result(sout,sin)

        return dr
Exemplo n.º 2
0
    def reset(self,raindata):
        #每一次batch都新生成一个新的降雨
        #每一次reset都赋予新的降雨,新的泵序列
        
#        set_datetime.set_date(self.sdate,self.edate,self.stime,self.etime,self.staf+'.inp')
#        A=random.randint(100,150)
#        C=random.randint(3,9)/10.00
#        P=random.randint(1,5)
#        b=12
#        n=0.77
#        R=random.randint(3,7)/10.00
#        self.rain=change_rain.gen_rain(self.date_t[-1],A,C,P,b,n,R,self.deltt)
        change_rain.change_rain(raindata,self.orf_rain+'.inp')#先修改self.orf_rain,再复制给staf
        change_rain.copy_result(self.staf+'.inp',self.orf_rain+'.inp')
        change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp')
        
        change_rain.copy_result(self.GA_tem+'.inp',self.orf_rain+'.inp')#将修改了降雨数据的self.orf_rain.inp复制给GAfile.inp文件
        
        self.iten=1
        self.action_seq=[]
        
        tem_etime=self.date_time[self.iten]
        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.staf+'.inp')
        
        '''
        #对比HC
        change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原
        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
        self.simulation('compare_tem_HC.inp')
        '''
        #获取所有的pumps
        pumps=[]
        for pool in self.pool_list:        
            for item in self.pump_list[pool]:
                pumps.append(item)


        #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据
        self.GA_action_seq=GA_sim.GA_sim(self.GA_tem,self.GAfile,self.crossRate,self.mutationRate,self.lifeCount,self.date_time,pumps,self.GAStepNum)
        self.GA_flooding_step=GA_get_flooding_step.GA_get_flooding_step(self.GA_action_seq,self.GA_tem,self.GAStepfile,self.date_time,pumps)
         
        self.simulation(self.staf+'.inp')
     
        total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt')
        #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
        self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten)
        rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata)
        
        for pool in self.pool_list:
            state=np.array([outflow/total_in,flooding/total_in,store/total_in,self.pool_d[pool],rain_sum])
        
        return state
Exemplo n.º 3
0
    def test(self,test_num):
        """train method.
        """
        #加载模型
        saver=tf.train.Saver()
        saver.restore(self.sess,"./save/model.ckpt")
        
        dr=[]
        for i in range(test_num):
            print('test'+str(i))
            s =self.env.reset(self.testRainData[i])
            t = 0
            track_r = []
            while True:
                a = self.actor.choose_action(s)
        
                s_, r, done, info = self.env.step(a,self.testRainData[i])
        
                #if done: r = -20
        
                track_r.append(r)
        
                td_error = self.critic.learn(s, r, s_)  # gradient = grad[r + gamma * V(s_) - V(s)]
                self.actor.learn(s, a, td_error)     # true_gradient = grad[logPi(s,a) * td_error]
        
                s = s_
                t += 1
                if done:
                    dr.append(track_r)
                    break
                
            #对比HC
            change_rain.copy_result('./test_result/HC/compare_tem_HC'+str(i)+'.inp',self.env.orf_rain+'.inp')#还原
            tem_etime=self.env.date_time[self.env.iten]
            set_datetime.set_date(self.env.sdate,self.env.edate,self.env.stime,tem_etime,'./test_result/HC/compare_tem_HC'+str(i)+'.inp')
            self.env.simulation('./test_result/HC/compare_tem_HC'+str(i)+'.inp')

            #history['episode'].append(i)
            #history['Episode_reward'].append(episode_reward)
            #print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward))
            sout='./test_result/'+str(i)+'.rpt'
            sin=self.env.staf+'.rpt'
            change_rain.copy_result(sout,sin)
            #self.env.copy_result(sout,sin)

        return dr
Exemplo n.º 4
0
        b = 12
        n = 0.77
        R = random.randint(2, 8) / 10.00
        rain = change_rain.gen_rain(date_t[-1], A, C, P, b, n, R, deltt)

        bcfile = './boundary condition/otbc'
        bcstartfile = './boundary condition/ot'
        change_rain.copy_result(bcstartfile + '.inp', bcfile + '.inp')

        change_rain.change_rain(rain, bcfile + '.inp')
        #print(A,C,P,b,n,R)

        sdate = edate = '08/28/2015'
        stime = date_time[0]
        etime = date_time[-1]
        set_datetime.set_date(sdate, edate, stime, etime, bcfile + '.inp')

        simulation(bcfile + '.inp')
        filename = bcfile + '.out'
        sub, node, link, sub_name, node_name, link_name = get_output.read_out(
            filename)
        #边界条件在node中
        inflow_data = bc_data(node, 'inflow')
        flooding_data = bc_data(node, 'flooding')
        #初始条件
        init_data = ic_data(node, link)
        #print(len(init_data))

        mf_result = []
        mf_result.append(init_data)
Exemplo n.º 5
0
    def test(self):
        #反复多次进行模拟
        saver = tf.train.Saver()
        saver.restore(self.sess, "./save/model.ckpt")
        etime = date_time[1]
        xs, ys, drs = [], [], []
        pumps = []
        for pool in self.pool_list:  #pool为前池
            for item in self.pump_list[pool]:
                pumps.append(item)

        for iten in range(len(self.testRainData)):
            reward_sum = 0

            change_rain.copy_result(self.startfile + '.inp',
                                    'arg-original.inp')
            change_rain.copy_result(self.infile + '.inp', 'arg-original.inp')

            rainData = self.testRainData[iten]

            change_rain.change_rain(rainData, self.startfile + '.inp')
            self.simulation(self.infile + '.inp')

            change_rain.copy_result(
                self.GA_tem + '.inp', self.startfile +
                '.inp')  #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据
            #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据
            self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile,
                                               self.crossRate,
                                               self.mutationRate,
                                               self.lifeCount, self.date_time,
                                               self.pumps, self.GAStepNum)
            self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step(
                self.GA_action_seq, self.GA_tem, self.GAStepfile,
                self.date_time, self.pumps)

            begin = datetime.datetime.now()

            change_rain.copy_result(
                './sim/test/result/en/inp/' + str(iten) + '.inp',
                self.startfile + '.inp')
            change_rain.copy_result(
                './sim/test/result/en/rpt/' + str(iten) + '.rpt',
                self.startfile + '.rpt')

            set_datetime.set_date(self.sdate, self.edate, self.stime, etime,
                                  self.startfile + '.inp')

            change_rain.copy_result(self.infile + '.inp',
                                    self.startfile + '.inp')

            self.simulation(self.infile + '.inp')
            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                self.infile + '.rpt')
            #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
            self.pool_d = get_output.depth(self.infile + '.out',
                                           self.pool_list, self.date_t[1])

            action_seq = []
            log_reward = ''

            for i in range(1,
                           len(self.date_t) -
                           1):  #用1场降雨生成结果,随机生成batch_size场降雨的inp
                rain_sum = sum(
                    rainData[self.date_t[i]:self.date_t[i +
                                                        1]]) / max(rainData)

                action = []

                for pool in self.pool_list:
                    observation = [
                        outflow / total_in, flooding / total_in,
                        store / total_in, self.pool_d[pool], rain_sum
                    ]
                    x_in = observation
                    x = np.reshape(x_in, [1, self.D])
                    tfprob = self.sess.run(self.probability,
                                           feed_dict={self.observations: x})

                    #对flage初始化为0,针对不同的情况设置flage
                    flage = 0

                    if self.pool_d[pool] > (self.limit_level[pool][
                            0]) and self.pool_d[pool] < (self.limit_level[
                                pool][2]):  #判断是否到达最低水位,当到达最低水位时,设置flag为True
                        flage = 0

                    elif self.pool_d[pool] < (self.limit_level[pool][0]
                                              ):  #当前池水位小于最低开泵水位时,flage设置为-1
                        flage = -1
                    else:
                        flage = 1

                    #泵的启停策略
                    if flage == 0:
                        if tfprob < self.action_space[1] + 0.1:  #概率小于0的时候,不开泵
                            action.append(0)
                            action.append(0)
                            a = 0
                        elif tfprob >= self.action_space[
                                1] + 0.1 and tfprob < self.action_space[
                                    1] + 0.6:  #概率在0.1~0.6的时候,开一个泵
                            action.append(0)
                            action.append(1)
                            a = 1
                        else:  #开两个泵
                            action.append(1)
                            action.append(1)
                            a = 1
                    elif flage == -1:
                        action.append(0)
                        action.append(0)
                        a = 0
                    else:  #flag为1的时候全开
                        action.append(1)
                        action.append(1)
                        a = 1

                    xs.append(x)
                    y = 1 - a
                    ys.append(y)

                #设置pump并模拟之后才有reward
                action_seq.append(action)

                #stime=date_time[i]
                etime = self.date_time[i + 1]
                set_datetime.set_date(self.sdate, self.edate, self.stime,
                                      etime, self.startfile + '.inp')
                change_rain.copy_result(self.infile + '.inp',
                                        self.startfile + '.inp')
                set_pump.set_pump(action_seq, self.date_time[1:i + 1], pumps,
                                  self.infile + '.inp')

                self.simulation(self.infile + '.inp')
                #change_rain.copy_result('check'+str(i)+'.inp',infile+'.inp')
                #获取rpt内信息,产生新的action
                total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                    self.infile + '.rpt')
                #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
                self.pool_d = get_output.depth(self.infile + '.out',
                                               self.pool_list, date_t[i] - i)

                #reward计算 当前reward只考虑溢流污染控制
                for pool in self.pool_list:
                    reward_sum += (self.GA_flooding_step[i] - flooding) / (
                        0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据
                    drs.append((self.GA_flooding_step[i] - flooding) /
                               (0.0001 + total_in))

                log_reward += str(reward_sum) + '\n'

            end = datetime.datetime.now()
            print(iten, '  ', end - begin)
            f = open('reward' + str(iten) + '.txt', 'w')
            f.write(log_reward)
            #保存inp与rpt文件
            change_rain.copy_result(
                './sim/test/result/ai/inp/' + str(iten) + '.inp',
                './sim/test/oti.inp')
            change_rain.copy_result(
                './sim/test/result/ai/rpt/' + str(iten) + '.rpt',
                './sim/test/oti.rpt')
            print("操控序列:", action_seq)
            print("得分:", reward_sum)
Exemplo n.º 6
0
    def train(self):
        xs, ys, drs = [], [], []
        rendering = False
        init = tf.global_variables_initializer()
        self.sess.run(init)
        gradBuffer = self.sess.run(self.tvars)

        for ix, grad in enumerate(gradBuffer):
            gradBuffer[ix] = grad * 0

        etime = date_time[1]
        episode_number = 0
        print(len(self.trainRainData))

        pumps = []

        for pool in self.pool_list:  #pool为前池
            for item in self.pump_list[pool]:
                pumps.append(item)

        while episode_number < len(self.trainRainData):
            reward_sum = 0

            rainData = self.trainRainData[episode_number]

            s1 = datetime.datetime.now()

            change_rain.copy_result(
                self.startfile + '.inp', 'arg-original.inp'
            )  #将最初输入文件arg-original.inp复制为start输入文件,每次reset对其修改降雨数据
            change_rain.copy_result(
                self.infile + '.inp',
                'arg-original.inp')  #初始化生成一个infile文件,infile为每个模拟所用input文件

            change_rain.change_rain(rainData, self.startfile +
                                    '.inp')  #修改start inp文件中的降雨数据
            #print(A,C,P,b,n,R)

            change_rain.copy_result(self.GA_tem + '.inp', self.startfile +
                                    '.inp')  #将修改了降雨数据的输入文件,复制为GA——tem文件
            #对比GA,生成GA算法在降雨时间内的策略和flooding数据,flooding数据作为DDQN每步模拟的基准数据
            self.GA_action_seq = GA_sim.GA_sim(self.GA_tem, self.GAfile,
                                               self.crossRate,
                                               self.mutationRate,
                                               self.lifeCount, self.date_time,
                                               pumps, self.GAStepNum)
            self.GA_flooding_step = GA_get_flooding_step.GA_get_flooding_step(
                self.GA_action_seq, self.GA_tem, self.GAStepfile,
                self.date_time, pumps)

            #先sim10min
            set_datetime.set_date(self.sdate, self.edate, self.stime, etime,
                                  self.startfile +
                                  '.inp')  #修改start input文件中的时间数据

            change_rain.copy_result(self.infile + '.inp', self.startfile +
                                    '.inp')  #将修改降雨数据后的start inp文件复制为infile文件
            self.simulation(self.infile + '.inp')

            #获取rpt内信息,产生新的action
            total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                self.infile + '.rpt')

            self.pool_d = get_output.depth(self.infile + '.out',
                                           self.pool_list, self.date_t[1])

            action_seq = []

            for i in range(1, len(self.date_t) - 1):  #对于一场雨中的每一步进行迭代运算
                rain_sum = sum(
                    rainData[self.date_t[i]:self.date_t[i +
                                                        1]]) / max(rainData)

                action = []
                for pool in self.pool_list:  #pool为前池

                    observation = [
                        outflow / (0.001 + total_in),
                        flooding / (0.001 + total_in),
                        store / (0.001 + total_in), self.pool_d[pool], rain_sum
                    ]
                    x_in = observation
                    x = np.reshape(x_in, [1, self.D])
                    tfprob = self.sess.run(self.probability,
                                           feed_dict={self.observations:
                                                      x})  #开关泵概率

                    #对flage初始化为0,针对不同的情况设置flage
                    flage = 0

                    if self.pool_d[pool] > (self.limit_level[pool][0]
                                            ) and self.pool_d[pool] < (
                                                self.limit_level[pool][2]
                                            ):  #判断是否大于最低水位,小于最高水位时,设置flag为0
                        flage = 0

                    elif self.pool_d[pool] < (self.limit_level[pool][0]
                                              ):  #当前池水位小于最低开泵水位时,flage设置为-1
                        flage = -1
                    else:  #否则为1
                        flage = 1

                    #泵的启停策略
                    if flage == 0:
                        if tfprob < self.action_space[1] + 0.1:
                            #概率小于0.1的时候,不开泵
                            action.append(0)
                            action.append(0)
                            a = 0
                        elif tfprob >= self.action_space[
                                1] + 0.1 and tfprob < self.action_space[
                                    1] + 0.6:
                            #概率在0.1~0.6的时候,开一个泵
                            action.append(0)
                            action.append(1)
                            a = 1
                        else:  #开两个泵
                            action.append(1)
                            action.append(1)
                            a = 1
                    elif flage == -1:  #小于最低水位不开泵
                        action.append(0)
                        action.append(0)
                        a = 0
                    else:  #flag为1的时候大于最高水位,泵全开
                        action.append(1)
                        action.append(1)
                        a = 1

                    xs.append(x)
                    y = 1 - a
                    ys.append(y)

                #添加所有泵的action
                action_seq.append(action)
                #print(action_seq)

                #stime=date_time[i]
                etime = date_time[i + 1]
                set_datetime.set_date(self.sdate, self.edate, self.stime,
                                      etime, self.startfile + '.inp')
                change_rain.copy_result(self.infile + '.inp',
                                        self.startfile + '.inp')
                set_pump.set_pump(action_seq, date_time[1:i + 1], pumps,
                                  self.infile + '.inp')

                self.simulation(self.infile + '.inp')

                #获取rpt内信息,当前时刻的flooding,产生新的action
                total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
                    self.infile + '.rpt')
                #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
                self.pool_d = get_output.depth(self.infile + '.out',
                                               self.pool_list,
                                               self.date_t[i] - i)

                #reward计算 当前reward只考虑溢流污染控制

                for pool in self.pool_list:
                    reward_sum += (self.GA_flooding_step[i] - flooding) / (
                        0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据
                    drs.append((self.GA_flooding_step[i] - flooding) /
                               (0.0001 + total_in))

            episode_number += 1
            #完成一场降雨模拟,更新agent
            #when the game over, which means the stick fall; means the time is out in the new situation
            #记录一场降雨的reward
            epx = np.vstack(xs)
            epy = np.vstack(ys)
            epr = np.vstack(drs)
            xs, ys, drs = [], [], []
            discounted_epr = self.discount_reward(epr)
            discounted_epr -= np.mean(discounted_epr)
            discounted_epr /= np.std(discounted_epr)

            tGrad = self.sess.run(self.newGrads,
                                  feed_dict={
                                      self.observations: epx,
                                      self.input_y: epy,
                                      self.advantages: discounted_epr
                                  })
            for ix, grad in enumerate(tGrad):
                gradBuffer[ix] += grad

            #若已有一个batch的reward值,用于更新agent
            if episode_number % self.batch_size == 0:
                #print("train")
                self.sess.run(self.updateGrads,
                              feed_dict={
                                  self.W1Grad: gradBuffer[0],
                                  self.W2Grad: gradBuffer[1]
                              })
                print('Average reward for %d:%f.' %
                      (episode_number, reward_sum / self.batch_size))
                #reward_sum=0
                for ix, grad in enumerate(gradBuffer):
                    gradBuffer[ix] = grad * 0

                #if abs(old_reward-reward_sum/self.batch_size)/abs(old_reward)<=1e-15:
                #print("Task soveld in", episode_number)
                #break
                #old_reward=reward_sum/self.batch_size

            #observation=env.reset()

            s2 = datetime.datetime.now()
            print(s2 - s1)
        print("training done")
        saver = tf.train.Saver()
        sp = saver.save(self.sess, "./save/model.ckpt")
        print("model saved:", sp)
        return drs
Exemplo n.º 7
0
    def step(self,a,raindata):
        #修改statf的date,根据iten逐步向前
        #加入action
        #开始模拟,存储结果

        self.iten+=1
        action=[]
        pumps=[]
        
        #print('a=',a)
        for pool in self.pool_list:
            #检测水位,根据水位决定泵启停
            flage=0
            if self.pool_d[pool]>(self.limit_level[pool][0]) and self.pool_d[pool]<(self.limit_level[pool][2]):
                flage=0
            elif self.pool_d[pool]<(self.limit_level[pool][0]):
                flage=-1
            else:
                flage=1
            
            #泵的启停策略
            if flage==0:
                if a<self.action_space[1]+0.1:
                    action.append(0)
                    action.append(0)
                elif a>=self.action_space[1]+0.1 and a<self.action_space[1]+0.6:
                    action.append(0)
                    action.append(1)
                else:
                    action.append(1)
                    action.append(1)               
            elif flage==-1:
                action.append(0)
                action.append(0)
            else:
                action.append(1)
                action.append(1)
                
                
            for item in self.pump_list[pool]:
                pumps.append(item)
                
            #设置pump并模拟之后才有reward
            self.action_seq.append(action)
            #print(self.action_seq)

        set_pump.set_pump(self.action_seq,self.date_time[1:self.iten],pumps,self.orftem+'.inp')
        
        
        tem_etime=self.date_time[self.iten]
        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,self.orftem+'.inp')
        #change_rain.copy_result(infile+'.inp',startfile+'.inp')
        
        #还原SWMM缓存inp
        change_rain.copy_result(self.staf+'.inp',self.orftem+'.inp')
        change_rain.copy_result(self.orftem+'.inp',self.orf_rain+'.inp')
        
        #step forward
        self.simulation(self.staf+'.inp')

        #从out和rpt文件读取sate值
        #如果iten==最终的时间步,模拟停止
        total_in,flooding,store,outflow,upflow,downflow=get_rpt.get_rpt(self.staf+'.rpt')
        #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
        self.pool_d=get_output.depth(self.staf+'.out',self.pool_list,self.date_t[self.iten]-self.iten)
        rain_sum=sum(raindata[self.date_t[self.iten]:self.date_t[self.iten+1]])/max(raindata)
        
        for pool in self.pool_list:
            state=np.array([outflow/(0.001+total_in),flooding/(0.001+total_in),store/(0.001+total_in),self.pool_d[pool],rain_sum])
        
        self.simulation(self.staf+'.inp')
        
        #reward计算 当前reward只考虑溢流污染控制
        '''
        #reward1
        reward_sum=0
        for pool in self.pool_list:
            if flooding>total_in*0.1:
                reward_sum+=-1.0
            else:
                reward_sum+=1.0
        '''
        
        #try different reward
#        change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')
#        set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
#         #reward2使用的标准比对baseline
#        self.simulation('compare_tem_HC.inp')
#        _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt')
        
        '''
        #reward2
        reward_sum=0
        for pool in self.pool_list:
            if flooding_compare!=0.0:
                reward_sum+=(flooding_compare-flooding)/flooding_compare
            else:
                reward_sum+=-flooding/(0.0001+total_in)
        '''
                
        #与GA算法计算的flooding进行比较        
        reward_sum=0
        for pool in self.pool_list:
            reward_sum+=(self.GA_flooding_step[self.iten]-flooding)/(0.0001+total_in)#GA算法的flooding数据作为DDQN每步模拟的基准数据
        
        if self.iten==self.T-2:
            done=True
        else:
            done=False

        return state,reward_sum,done,{}
Exemplo n.º 8
0
    def test(self, test_num):
        """train method.
        """
        #tf.reset_default_graph()

        #history = {'episode': [], 'Episode_reward': []}

        dr = []
        for i in range(test_num):
            print('test' + str(i))
            observation = self.env.reset(self.testRainData[i])

            states, actions, rewards = [], [], []
            episode_reward = 0
            j = 0

            while True:
                a = self.choose_action(observation)
                #print(a)
                next_observation, reward, done, _ = self.env.step(
                    a, self.testRainData[i])
                states.append(observation)
                actions.append(a)

                episode_reward += reward
                rewards.append((reward + 8) / 8)

                observation = next_observation

                if (j + 1) % self.batch == 0:
                    states = np.array(states)
                    actions = np.array(actions)
                    rewards = np.array(rewards)
                    d_reward = self.discount_reward(states, rewards,
                                                    next_observation)

                    #self.update(states, actions, d_reward)

                    states, actions, rewards = [], [], []
                    dr.append(d_reward)
                if done:
                    break
                j += 1

            #对比HC
            self.env.copy_result('./test_result/HC/compare_tem_HC' + str(i) +
                                 '.inp', self.env.orf_rain + '.inp')  #还原
            tem_etime = self.env.date_time[self.env.iten]
            set_datetime.set_date(
                self.env.sdate, self.env.edate, self.env.stime, tem_etime,
                './test_result/HC/compare_tem_HC' + str(i) + '.inp')
            self.env.simulation('./test_result/HC/compare_tem_HC' + str(i) +
                                '.inp')

            #history['episode'].append(i)
            #history['Episode_reward'].append(episode_reward)
            #print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward))
            sout = './test_result/' + str(i) + '.rpt'
            sin = self.env.staf + '.rpt'
            self.env.copy_result(sout, sin)
            #self.env.copy_result(sout,sin)

        return dr
Exemplo n.º 9
0
    def step(self, a, rainData):  #对降雨数据进行逐步模拟
        #修改statf的date,根据iten逐步向前
        #加入action
        #开始模拟,存储结果
        action = []
        pumps = []

        self.iten += 1

        #print('a=',a)
        for pool in self.pool_list:
            #检测水位,根据水位决定泵启停
            flage = 0
            if self.pool_d[pool] > (
                    self.limit_level[pool][0]) and self.pool_d[pool] < (
                        self.limit_level[pool][2]):  #当前池水位大于最低开泵水位,小于最高水位时
                flage = 0
            elif self.pool_d[pool] < (
                    self.limit_level[pool][0]):  #当前池水位小于最低开泵水位时,flage设置为-1
                flage = -1
            else:
                flage = 1

            #泵的启停策略
            if flage == 0:
                if a < self.action_space[1] + 0.1:  #概率小于0.1的时候,不开泵
                    action.append(0)
                    action.append(0)
                elif a >= self.action_space[1] + 0.1 and a < self.action_space[
                        1] + 0.6:  #概率在0.1~0.6的时候,开一个泵
                    action.append(0)
                    action.append(1)
                else:  #开两个泵
                    action.append(1)
                    action.append(1)
            elif flage == -1:
                action.append(0)
                action.append(0)
            else:  #flag为1的时候全开
                action.append(1)
                action.append(1)

            for item in self.pump_list[pool]:
                pumps.append(item)

            #设置pump并模拟之后才有reward
            self.action_seq.append(action)
            #print(self.action_seq)

        set_pump.set_pump(self.action_seq, self.date_time[1:self.iten], pumps,
                          self.orftem + '.inp')  #对self.orftem+'.inp'文件修改泵的策略
        #print(pumps)

        tem_etime = self.date_time[self.iten]
        set_datetime.set_date(self.sdate, self.edate, self.stime, tem_etime,
                              self.orftem +
                              '.inp')  #在修改泵的策略以后,修改self.orftem+'.inp'文件
        #change_rain.copy_result(infile+'.inp',startfile+'.inp')

        change_rain.copy_result(self.staf + '.inp', self.orftem +
                                '.inp')  #将self.orftem+'.inp'文件复制为self.staf文件
        change_rain.copy_result(self.orftem + '.inp', self.orf_rain +
                                '.inp')  #还原 将self.orftem+'.inp'修改为只修改了降雨数据的文件

        self.simulation(self.staf + '.inp')

        #从out和rpt文件读取sate值
        #如果iten==最终的时间步,模拟停止
        total_in, flooding, store, outflow, upflow, downflow = get_rpt.get_rpt(
            self.staf + '.rpt')
        #在确定泵开关之前确定最末时刻(当前)前池水位,水位过低时不开启
        self.pool_d = get_output.depth(self.staf + '.out', self.pool_list,
                                       self.date_t[self.iten] - self.iten)

        rain_sum = sum(
            rainData[self.date_t[self.iten]:self.date_t[self.iten +
                                                        1]]) / max(rainData)

        for pool in self.pool_list:
            state = np.array([
                outflow / (0.001 + total_in), flooding / (0.001 + total_in),
                store / (0.001 + total_in), self.pool_d[pool], rain_sum
            ])

        #reward计算 当前reward只考虑溢流污染控制
        #在这里对每场降雨进行GA运算,并对最后运算出来的泵的控制策略时间序列,进行模拟计算,提取每一步的flooding数据

        #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')#还原
        #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
        #self.simulation(self.staf+'.inp') 重复的吧?

        #try different reward
        #change_rain.copy_result('compare_tem_HC.inp',self.orf_rain+'.inp')
        #set_datetime.set_date(self.sdate,self.edate,self.stime,tem_etime,'compare_tem_HC.inp')
        #reward2使用的标准比对baseline


#        self.simulation('compare_tem_HC.inp')
#        _,flooding_compare,_,_,_,_=get_rpt.get_rpt('compare_tem_HC.rpt')
        '''
        #reward1
        reward_sum=0
        for pool in self.pool_list:
            if flooding>total_in*0.1:
                reward_sum+=-1.0
            else:
                reward_sum+=1.0
        '''
        '''
        #reward2
        reward_sum=0
        for pool in self.pool_list:
            if flooding_compare!=0.0:
                reward_sum+=(flooding_compare-flooding)/flooding_compare
            else:
                reward_sum+=-flooding/(0.0001+total_in)
        '''
        #与GA算法计算的flooding进行比较
        reward_sum = 0
        for pool in self.pool_list:
            reward_sum += (self.GA_flooding_step[self.iten] - flooding) / (
                0.0001 + total_in)  #GA算法的flooding数据作为DDQN每步模拟的基准数据

        if self.iten == self.T - 2:
            done = True
        else:
            done = False

        return state, reward_sum, done, {}