示例#1
0
    def __init__(self, gamma: float):
        # CartPole 環境ロード
        self.env = gym.make('CartPole-v1')

        # Action
        self.list_action = np.array([0, 1])

        # Value Function
        nn = TorchNN(
            self.env.env.observation_space.shape[0],
            Layer("fc1", torch.nn.Linear, 16, None, (), {}),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("fc2", torch.nn.Linear, 16, None, (), {}),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("fc3", torch.nn.Linear, 16, None, (), {}),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("fc4", torch.nn.Linear, self.env.env.action_space.n, None,
                  (), {}),
            Layer("output", torch.nn.Softmax, None, None, (), {"dim": 1}),
        )
        policy = PolicyGradientNN(nn,
                                  self.list_action,
                                  batch_size=-1,
                                  lr=0.001)
        policy.to_cuda()
        super().__init__(policy=policy,
                         list_action=self.list_action,
                         gamma=gamma)

        # Others
        self.done = False
示例#2
0
    def __init__(self, gamma: float, epsilon: float):
        # CartPole 環境ロード
        self.env = gym.make('CartPole-v0')

        # Action
        self.list_action = np.array([0, 1])

        # Value Function
        nn = TorchNN(
            self.env.env.observation_space.shape[0],
            Layer("fc1", torch.nn.Linear, 16, None, (), {}),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("fc2", torch.nn.Linear, 16, None, (), {}),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("fc3", torch.nn.Linear, 16, None, (), {}),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("fc4", torch.nn.Linear, self.env.env.action_space.n, None,
                  (), {}),
        )
        qfunc = DQN(nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=128,
                    capacity=10000,
                    lr=0.0001)
        qfunc.to_cuda()
        QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)

        # Others
        self.done = False
示例#3
0
    def __init__(self, gamma: float, epsilon: float):
        # CartPole 環境ロード
        self.env = gym.make('CartPole-v0')

        # Action
        self.list_action = np.array([0, 1])

        # Value Function
        self.insize = 20
        nn = TorchNN(
            self.insize,
            Layer("conv1", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 20,
                "stride": 10,
            }),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("conv2", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 5,
                "stride": 5
            }),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("view7", torch.nn.Identity, 64 * 3 * 11, "reshape(x,-1)", (),
                  {}),
            Layer("fc7", torch.nn.Linear, 512, None, (), {}),
            Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu7", torch.nn.ReLU, None, None, (), {}),
            Layer("fc8", torch.nn.Linear, 256, None, (), {}),
            Layer("norm8", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu8", torch.nn.ReLU, None, None, (), {}),
            Layer("fc9", torch.nn.Linear, 64, None, (), {}),
            Layer("norm9", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu9", torch.nn.ReLU, None, None, (), {}),
            Layer("output", torch.nn.Linear, self.env.env.action_space.n, None,
                  (), {}),
        )
        qfunc = DQN(nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=192,
                    capacity=1000,
                    lr=0.001)
        qfunc.to_cuda()
        QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)

        # Others
        self.done = False
        self.history = [None for _ in range(self.insize)]
示例#4
0
    def __init__(self,
                 gamma: float,
                 file_csv: str = "../data/s59h30megacities_utf8.csv",
                 n_capital: int = None):
        # まずは Base class で初期化して, df を load
        TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital)

        # Action の定義
        self.list_action = np.random.permutation(
            self.df["capital_en"].unique())

        # State の定義
        self.state_mng = StateManager()
        self.state_mng.set_state("country",
                                 state_type="onehot",
                                 state_list=self.list_action)
        self.state_mng.set_state("history",
                                 state_type="onehot_binary",
                                 state_list=self.list_action)  # 国の滞在履歴を状態に組み込む
        self.state_mng.set_state("is_last", state_type="binary")

        # NN の定義
        torch_nn = TorchNN(
            len(self.state_mng),
            Layer("fc1", torch.nn.Linear, 64, None, (), {}),
            Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("fc2", torch.nn.Linear, 64, None, (), {}),
            Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("fc3", torch.nn.Linear, len(self.list_action), None, (), {}),
            Layer("soft", torch.nn.Softmax, None, None, (), {"dim": 1}),
        )
        #torch_nn.set_weight(0.01)
        policy_nn = PolicyGradientNN(torch_nn,
                                     self.list_action,
                                     unit_memory=None,
                                     lr=0.001)
        policy_nn.to_cuda()
        PolicyGradient.__init__(self,
                                gamma=gamma,
                                policy=policy_nn,
                                list_action=self.list_action)
        self.action_pprev = None

        # 巡回できるようにするためのパラメータ
        self.is_back = False
        self.first_country = None
示例#5
0
    def __init__(self,
                 epsilon: float,
                 gamma: float,
                 file_csv: str = "../data/s59h30megacities_utf8.csv",
                 n_capital: int = None):
        # まずは Base class で初期化して, df を load
        TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital)

        # Action の定義
        self.list_action = np.random.permutation(
            self.df["capital_en"].unique())

        # State の定義
        self.state_mng = StateManager()
        self.state_mng.set_state("country",
                                 state_type="onehot",
                                 state_list=self.list_action)
        self.state_mng.set_state("history",
                                 state_type="onehot_binary",
                                 state_list=self.list_action)

        # DQN の定義
        torch_nn = TorchNN(
            len(self.state_mng),
            Layer("fc1", torch.nn.Linear, 128, None, (), {}),
            Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("fc2", torch.nn.Linear, 256, None, (), {}),
            Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("fc3", torch.nn.Linear, 128, None, (), {}),
            Layer("norm3", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("fc4", torch.nn.Linear, len(self.list_action), None, (), {}),
        )
        qfunc = DQN(torch_nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=128,
                    capacity=1000)
        qfunc.to_cuda()
        QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)
示例#6
0
    def __init__(self,
                 epsilon: float,
                 gamma: float,
                 file_csv: str = "../data/s59h30megacities_utf8.csv",
                 n_capital: int = None):
        super(TSPModel5, self).__init__(epsilon,
                                        gamma,
                                        file_csv=file_csv,
                                        n_capital=n_capital)

        # DQN の定義
        torch_nn = TorchNN(
            len(self.state_mng),
            Layer("lstm", torch.nn.LSTM, 128, None, (), {}),
            Layer("calc1", torch.nn.Identity, None, "rnn_outonly", (), {}),
            Layer("calc2", torch.nn.Identity, None, "call_options", (), {}),
            Layer("calc3", torch.nn.Identity, None, "rnn_all", (), {}),
            Layer("norm1", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("fc2", torch.nn.Linear, 128, None, (), {}),
            Layer("norm2", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("fc3", torch.nn.Linear, 64, None, (), {}),
            Layer("norm3", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("fc4", torch.nn.Linear, len(self.list_action), None, (), {}),
        )
        qfunc = DQN(torch_nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=20,
                    capacity=100,
                    unit_memory="episode",
                    lr=0.001)
        self.qfunc = qfunc
        self.qfunc.to_cuda()
        self.state_history = []  # LSTMなのでepisode中のstate履歴を記憶する
示例#7
0
    def __init__(self,
                 epsilon: float,
                 gamma: float,
                 file_csv: str = "../data/s59h30megacities_utf8.csv",
                 n_capital: int = None):
        # まずは Base class で初期化して, df を load
        TSPModelBase.__init__(self, file_csv=file_csv, n_capital=n_capital)

        # Action の定義
        self.list_action = np.random.permutation(
            self.df["capital_en"].unique())

        # DQN の定義
        torch_nn = TorchNN(
            3,
            Layer("conv1", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 5,
                "stride": 5,
            }),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("pool1", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv2", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 3,
                "stride": 3,
            }),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("pool2", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv3", torch.nn.Conv2d, 256, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("pool3", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 3,
                "stride": 3,
            }),
            Layer("view6", torch.nn.Identity, 256 * 1 * 2, "reshape(x,-1)", (),
                  {}),
            Layer("fc7", torch.nn.Linear, 256, None, (), {}),
            Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu7", torch.nn.ReLU, None, None, (), {}),
            Layer("fc8", torch.nn.Linear, 128, None, (), {}),
            Layer("norm8", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu8", torch.nn.ReLU, None, None, (), {}),
            Layer("fc9", torch.nn.Linear, 64, None, (), {}),
            Layer("norm9", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu9", torch.nn.ReLU, None, None, (), {}),
            Layer("output", torch.nn.Linear, len(self.list_action), None, (),
                  {}),
        )
        qfunc = DQN(torch_nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=128,
                    capacity=1000,
                    lr=0.001)
        qfunc.to_cuda()
        QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)
示例#8
0
    def __init__(self, gamma: float, epsilon: float):
        # CartPole 環境ロード
        self.env = gym.make('CartPole-v0')

        # Action
        self.list_action = np.array([0, 1])

        # Value Function
        cnn1 = TorchNN(
            3,
            Layer("conv1", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 3,
                "stride": 1,
            }),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("pool1", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv2", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("pool2", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv3", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("pool3", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv4", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu4", torch.nn.ReLU, None, None, (), {}),
            Layer("pool4", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv5", torch.nn.Conv2d, 32, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu5", torch.nn.ReLU, None, None, (), {}),
            Layer("pool5", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
        )
        cnn2 = TorchNN(
            3,
            Layer("conv1", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 3,
                "stride": 1,
            }),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("pool1", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv2", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("pool2", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv3", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("pool3", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv4", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu4", torch.nn.ReLU, None, None, (), {}),
            Layer("pool4", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv5", torch.nn.Conv2d, 32, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu5", torch.nn.ReLU, None, None, (), {}),
            Layer("pool5", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
        )
        cnn3 = TorchNN(
            3,
            Layer("conv1", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 3,
                "stride": 1,
            }),
            Layer("relu1", torch.nn.ReLU, None, None, (), {}),
            Layer("pool1", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv2", torch.nn.Conv2d, 128, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu2", torch.nn.ReLU, None, None, (), {}),
            Layer("pool2", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv3", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu3", torch.nn.ReLU, None, None, (), {}),
            Layer("pool3", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv4", torch.nn.Conv2d, 64, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("relu4", torch.nn.ReLU, None, None, (), {}),
            Layer("pool4", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
            Layer("conv5", torch.nn.Conv2d, 32, None, (), {
                "kernel_size": 2,
                "stride": 1,
            }),
            Layer("relu5", torch.nn.ReLU, None, None, (), {}),
            Layer("pool5", torch.nn.MaxPool2d, None, None, (), {
                "kernel_size": 2,
                "stride": 2,
            }),
        )
        nn = TorchNN(
            None,
            Layer("split", torch.nn.Identity, None, "split_numpy_3", (), {}),
            Layer("cnn1", cnn1, None, None, (), {}),
            Layer("cnn2", cnn2, None, None, (), {}),
            Layer("cnn3", cnn3, None, None, (), {}),
            Layer("view6", torch.nn.Identity, 3 * 32 * 1 * 4, "combine", (),
                  {}),
            Layer("fc6", torch.nn.Linear, 128, None, (), {}),
            Layer("norm6", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu6", torch.nn.ReLU, None, None, (), {}),
            Layer("fc7", torch.nn.Linear, 64, None, (), {}),
            Layer("norm7", torch.nn.BatchNorm1d, 0, None, (), {}),
            Layer("relu7", torch.nn.ReLU, None, None, (), {}),
            Layer("output", torch.nn.Linear, self.env.env.action_space.n, None,
                  (), {}),
        )
        qfunc = DQN(nn,
                    self.list_action,
                    gamma=gamma,
                    batch_size=8,
                    capacity=256,
                    lr=0.0001)
        qfunc.to_cuda()
        QLearn.__init__(self, qfunc=qfunc, epsilon=epsilon)

        # Others
        self.done = False
        self.history = [None, None, None, None, None]