Пример #1
0
def maxQvalue(screen, isFile, model0, model1, last0, last1, option):

    # 새로운 상태(=s')에서 각 행동 a'를 취했을 때의 보상값 Q(s', a') 구하기
    if isFile:
        if option % 3 == 0 or option % 3 == 1:
            Qvalues = Qlearning_deep_GPU.modelOutput(model0, [screen])
        elif option % 3 == 2:
            Qvalues = Qlearning_deep_GPU.modelOutput(model1, [screen])
    else:
        return 0

    # 그 보상값의 최댓값 반환
    if option % 3 == 2: return max(Qvalues[last1][0])
    return max(Qvalues[last0][0])
Пример #2
0
def getQvalues(screen, isFile, model0, model1, last0, last1, func0, actions,
               option):

    # 각 action에 대해 Q(s, a)의 값 구하기
    if isFile:
        if option % 3 == 0 or option % 3 == 1:
            Qvalues = Qlearning_deep_GPU.modelOutput(
                model0, [screen])  # 이미 flatten된 screen임
            if option % 3 == 0:
                print('Qval of decision whose next state is above:\n' +
                      str(Qvalues[last0][0]) + '\n\n')
            return Qvalues[last0][0]
        elif option % 3 == 2:
            Qvalues = Qlearning_deep_GPU.modelOutput(model1, [screen])
            print('Qval of decision whose next state is above:\n' +
                  str(Qvalues[last1][0]) + '\n\n')
            return Qvalues[last1][0]

    else:
        return [func0] * actions
                        else: # 학습 영역의 크기가 6 미만
                            NN = [tf.keras.layers.Flatten(input_shape=(size*size,)),
                                keras.layers.Dense(32, activation='relu'),
                                keras.layers.Dropout(drop),
                                keras.layers.Dense(32, activation='relu'),
                                keras.layers.Dropout(drop),
                                keras.layers.Dense(32, activation='relu'),
                                keras.layers.Dropout(drop),
                                keras.layers.Dense(32, activation='relu'),
                                keras.layers.Dense(2, activation='sigmoid')]

                        # 5. 옵티마이저
                        op = tf.keras.optimizers.Adam(0.001)
                        
                        print('training...')
                        Qlearning_deep_GPU.deepQlearning(0, NN, op, lc, inputs, outputs, None, None, 'WPCNdeep', [epoc, None], None, None, False, False, True, deviceName)

                        # 6. 테스트 결과 확인하고 정답과 비교하기
                        newModel = Qlearning_deep_GPU.deepLearningModel('WPCNdeep_0', False)
                        sumTestThroughput = 0.0 # test throughput의 합계
                        sumCorrectMaxThroughput = 0.0 # 정답의 throughput의 합계 (training data를 생성할 때와 같은 방법으로 최대 throughput 확인)

                        optiInfo = open('optiInfo_' + str(problemNo) + '.txt', 'r')
                        optiInformation = optiInfo.readlines()
                        optiInfo.close()

                        print('testing...')
                        
                        for i in range(numTest):
                            testScreen = originalScreen[numTrain + i] # 테스트할 스크린
                            testOutput = Qlearning_deep_GPU.modelOutput(newModel, [testScreen]) # 테스트 결과
Пример #4
0
                charValue=50,
                randomProb_=randomProb,
                feasible_=feasible,
                getNextState_=None)

            ## 1-4. lastScreen, lastDecision, lastPoint 갱신 ##
            for j in range(size):
                for k in range(size):
                    lastScreen[j][k] = screen[j][k]
            lastDecision = decision
            lastPoint = [point[0], point[1]]

        ### 2. 게임 종료 후 딥러닝 실행 ###
        if len(states) < 1000:  # 1000개 미만이면 모두 학습
            Qlearning_deep_GPU.deepQlearning(
                option, NN, op, 'mean_squared_error', states, outputs, Qdiffs,
                25, 'basket', [20, 25], [8, 'sigmoid', 'sigmoid'],
                [6, 'sigmoid', 'sigmoid'], False, False, True, deviceName)
        else:  # 1000개 이상이면 마지막 1000개만 학습
            Qlearning_deep_GPU.deepQlearning(
                option, NN, op, 'mean_squared_error',
                states[len(states) - 1000:], outputs[len(states) - 1000:],
                Qdiffs[len(states) - 1000:], 25, 'basket', [20, 25],
                [8, 'sigmoid', 'sigmoid'], [6, 'sigmoid', 'sigmoid'], False,
                False, True, deviceName)
        isFile = True

        if option % 3 == 0 or option % 3 == 1:
            model0 = Qlearning_deep_GPU.deepLearningModel('basket_0', False)
        if option % 3 == 1 or option % 3 == 2:
            model1 = Qlearning_deep_GPU.deepLearningModel('basket_1', False)
Пример #5
0
                               keras.layers.Conv2D(32, kernel_size=(3, 3), input_shape=(size, size, 1), activation='elu'),
                               keras.layers.MaxPooling2D(pool_size=2),
                               keras.layers.Dropout(drop),
                               keras.layers.Conv2D(32, (3, 3), activation='elu'),
                               keras.layers.Flatten(),
                               keras.layers.Dropout(drop),
                               keras.layers.Dense(40, activation='elu'),
                               keras.layers.Dense(2, activation='sigmoid')]

                        # 5. 옵티마이저
                        op0 = tf.keras.optimizers.Adam(0.001)
                        op1 = tf.keras.optimizers.Adam(0.001)
                        op2 = tf.keras.optimizers.Adam(0.001)
                        
                        print('training...')
                        Qlearning_deep_GPU.deepQlearning(0, NN2, op0, lc, inputs, outputs, None, None, 'WPCNdeepNN2', [epoc, None], None, None, False, False, True, deviceName)
                        Qlearning_deep_GPU.deepQlearning(0, NN1, op1, lc, inputs, outputs, None, None, 'WPCNdeepNN1', [epoc, None], None, None, False, False, True, deviceName)
                        Qlearning_deep_GPU.deepQlearning(0, NN0, op2, lc, inputs, outputs, None, None, 'WPCNdeepNN0', [epoc, None], None, None, False, False, True, deviceName)

                        # 6. 테스트 결과 확인하고 정답과 비교하기
                        newModel0 = Qlearning_deep_GPU.deepLearningModel('WPCNdeepNN0_0', False)
                        newModel1 = Qlearning_deep_GPU.deepLearningModel('WPCNdeepNN1_0', False)
                        newModel2 = Qlearning_deep_GPU.deepLearningModel('WPCNdeepNN2_0', False)
                        
                        sumTestThroughput = 0.0 # test throughput의 합계
                        sumCorrectMaxThroughput = 0.0 # 정답의 throughput의 합계 (training data를 생성할 때와 같은 방법으로 최대 throughput 확인)

                        optiInfo = open('optiInfo_' + str(problemNo) + '.txt', 'r')
                        optiInformation = optiInfo.readlines()
                        optiInfo.close()
    ]
    op = tf.keras.optimizers.Adam(0.001)
    Qdiff = [1, 2, 3, 4, 5, 6, 7]

    # apply each option
    for i in range(6):
        states = [[2, 1], [3, 2], [5, 3], [8, 5], [13, 8], [21, 13], [34, 21]]
        outputs = [[0.2, 0.5], [0.4, 0.6], [0.5, 0.7], [0.7, 0.75],
                   [0.8, 0.77], [0.85, 0.8], [0.9, 0.85]]

        # learning
        print('\n ' + ('#===' * 16) + ' <<<< option:' + str(i) +
              ' LEARNING >>>> ' + ('===#' * 16) + '\n')
        Qlearning_deep_GPU.deepQlearning(i, NN, op, 'mean_squared_error',
                                         states, outputs, Qdiff, 25,
                                         'test' + str(i), [20, 25],
                                         [8, 'sigmoid', 'sigmoid'],
                                         [6, 'sigmoid', 'sigmoid'], True, True,
                                         False, deviceName)

        # test
        print('\n ' + ('#===' * 16) + ' <<<< option:' + str(i) +
              ' TEST >>>> ' + ('===#' * 16) + '\n')

        if i % 3 == 0 or i % 3 == 1:
            print('\n << test output (첫번째 Neural Network) >>\n')
            newModel = Qlearning_deep_GPU.deepLearningModel(
                'test' + str(i) + '_0', False)
            testOutput = Qlearning_deep_GPU.modelOutput(
                newModel, [[4, 2.5], [6, 3.5], [7, 4.5]])
            print('\n[[4, 2.5], [6, 3.5], [7, 4.5]]에 대한 학습 결과:\n')
Пример #7
0
def decideAction(option, screen, screenCopy, point, additional, isFile, model0,
                 model1, last0, last1, gameNo, charSize, actions, exceedValue,
                 charValue, randomProb_, feasible_, getNextState_):

    screenSize = len(screen)  # screen의 가로/세로 길이
    learningSize = int(math.sqrt(
        len(screenCopy)))  # learningSize (screen의 캐릭터 주변부의 가로/세로 길이)

    around = int((learningSize - charSize) / 2)  # 캐릭터 주변 8방향으로 몇 칸까지 볼 것인가?
    charRadius = int((charSize - 1) / 2)  # 캐릭터의 중심점에서 주변 8방향으로 몇 칸까지 캐릭터인가?

    # 랜덤하게 실행
    if random.random() < randomProb_(gameNo, option):
        print('랜덤하게 행동 실행')
        while (1):
            action = random.randint(0, actions - 1)
            if feasible_(screen, action, additional, point): return action

    # 각 action에 대해 Q(s, a)의 값 구하기
    if isFile:
        # 기본 DQN
        # Q(s_t, a_t)에 대하여 a_t 값을 구해야 하므로 현재 상태인 screenCopy를 이용
        if option % 3 == 0:
            Qvalues = Qlearning_deep_GPU.modelOutput(model0, [screenCopy])

            # Double DQN
        elif option % 3 == 1:
            Qvalues = [-1000000] * actions

            for action in range(actions):  # 각 행동에 대하여

                # 다음 상태인 newScreen 구하기
                newScreen = getNextState_(screen, action)
                if newScreen == None:  # 다음 상태가 없으면(해당 action 실행 결과 index 오류 발생) 고려하지 않기
                    Qvalues[action] == -1000000
                    continue

                newPoint = [-1, -1]

                # newScreen에서 캐릭터의 위치 찾기
                for i in range(screenSize):
                    broken = False
                    for j in range(screenSize):
                        if newScreen[i][j] == charValue:
                            newPoint = [i + charRadius,
                                        j + charRadius]  # 캐릭터의 중심점
                            broken = True
                            break
                    if broken: break

                # newScreen에서 캐릭터 주변만 추출하기
                newScreenCopy = []
                for i in range(newPoint[0] - charRadius - around,
                               newPoint[0] + charRadius + around + 1):
                    for j in range(newPoint[1] - charRadius - around,
                                   newPoint[1] + charRadius + around + 1):
                        # 가능한 범위를 넘어서는 경우 exceedValue를 적용
                        if exceed(newScreen, i, j):
                            newScreenCopy.append(exceedValue)
                        else:
                            newScreenCopy.append(newScreen[i][j])

                # model 0에 대한 Q value (action별 reward)
                # Q(s_(t+1), ...)에서 상태가 s_(t+1)이므로 다음 상태인 newScreen을 이용
                Qvalues0 = Qlearning_deep_GPU.modelOutput(
                    model0, [newScreenCopy])

                # model 1에 대한 Q value (reward가 가장 큰 action만 one-hot으로 1임)
                # Q(s_(t+1), argmax(a)Q(s_(t+1), a))에서 상태가 s_(t+1)이므로 newScreen을 이용
                Qvalues1 = Qlearning_deep_GPU.modelOutput(
                    model1, [newScreenCopy])

                # Qvalues1 으로부터 argmax(a)Q(s_(t+1), a)) 구하기
                argmaxA = -1
                for i in range(len(Qvalues1[last0][0])):
                    if Qvalues1[last0][0][i] == max(Qvalues1[last0][0]):
                        argmaxA = i
                        break

                # Qvalues0 으로부터 Q(s_(t+1), argmax(a)Q(s_(t+1), a))의 값 = Qvalue0[argmaxA] 구하기
                Qvalues[action] = Qvalues0[last0][0][argmaxA]

        # Dueling Network
        elif option % 3 == 2:
            Qvalues = Qlearning_deep_GPU.modelOutput(model1, [screenCopy])

    else:
        while (1):
            action = random.randint(0, actions - 1)
            if feasible_(screen, action, additional, point): return action

    if option % 3 == 1:
        print('final Qvalues (below is result of action made by them):',
              Qvalues)

    # 실행할 수 없는 action을 제거
    for i in range(actions):
        if not feasible_(screen, i, additional, point):
            if option % 3 == 1: Qvalues[i] = -1000000
            elif option % 3 == 2: Qvalues[last1][0][i] = -1000000
            else: Qvalues[last0][0][i] = -1000000

    # 보상값이 최대인 행동을 반환
    if option % 3 == 1:
        for i in range(actions):
            if Qvalues[i] == max(Qvalues): return i
    elif option % 3 == 2:
        for i in range(actions):
            if Qvalues[last1][0][i] == max(Qvalues[last1][0]): return i
    else:
        for i in range(actions):
            if Qvalues[last0][0][i] == max(Qvalues[last0][0]): return i