Exemplo n.º 1
0
def train_eval(train, targets):
    features = train.columns
    folds = KFold(n_splits=5, shuffle=True, random_state=1420)
    oof = np.zeros(len(train))

    for fold_, (trn_idx,
                val_idx) in enumerate(folds.split(train.values,
                                                  targets.values)):
        if fold_ > 0:
            break
        print("Fold {}".format(fold_))
        #n=len(trn_idx)
        trn_data, trn_label = train.iloc[trn_idx][
            features].values, targets.iloc[trn_idx].values
        val_data, val_label = train.iloc[val_idx][
            features].values, targets.iloc[val_idx].values
        model = MLPModel(args)
        model.train(trn_data, trn_label, val_data, val_label)

        oof[val_idx] = model.evaluate(val_data, val_label)

        # fold_importance_df = pd.DataFrame()
        # fold_importance_df["Feature"] = features
        # fold_importance_df["importance"] = clf.feature_importance()
        # fold_importance_df["fold"] = fold_ + 1
        # feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
        mae = mean_absolute_error(targets.iloc[val_idx], oof[val_idx])
        mse = mean_squared_error(targets.iloc[val_idx], oof[val_idx])

        logging.info('mae: {:<8.5f}'.format(mae))
        logging.info('mse: {:<8.5f}'.format(mse))
Exemplo n.º 2
0
    def run_submission(self):

        self.debug_print(
            "Use the print function `self.debug_print(...)` for debugging purposes, do NOT use the default `print(...)`"
        )

        # create model
        model = MLPModel(time_step=15, window_size=10, weights='model.hdf5')

        while (True):
            """
            NOTE: Only one of (get_next_data_as_string, get_next_data_as_list, get_next_data_as_numpy_array) can be used
            to get the row of data, please refer to the `OVERVIEW OF DATA` section above.

            Uncomment the one that will be used, and comment the others.
            """

            # data = self.get_next_data_as_list()
            data = self.get_next_data_as_numpy_array()
            # data = self.get_next_data_as_string()

            # prediction = self.get_prediction(data)
            prediction = model.predict(data)
            """
            submit_prediction(prediction) MUST be used to submit your prediction for the current row of data
            """
            self.submit_prediction(prediction)
Exemplo n.º 3
0
    def __init__(self, state_size: int, action_size: int, seed: int):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.q_local = MLPModel(state_size, action_size, seed).to(device)
        self.q_target = MLPModel(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.q_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0
Exemplo n.º 4
0
def graph_builder(opts,
                  observed=None,
                  ground_truth=None,
                  learning_rate=0.001,
                  mode=util.Modes.TRAIN):

    # Build the neural network
    predictions = MLPModel(opts, mode=mode)(observed)

    # Loss
    loss = opts.loss_scaling * tf.cast(tf.losses.absolute_difference(
        ground_truth, predictions, reduction=tf.losses.Reduction.MEAN),
                                       dtype=getattr(tf, opts.dtypes[0]))

    # Error metric
    rmse_metric = util.exp_rmspe(ground_truth, predictions)

    if mode == util.Modes.TRAIN:
        # Training
        optimizer = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate)
        # Wrap in a CrossReplica if we're replicating across multiple IPUs
        if opts.replication_factor > 1:
            optimizer = cross_replica_optimizer.CrossReplicaOptimizer(
                optimizer)
        # Batch norm variable update dependency
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            # Op to calculate every variable gradient
            grads = tf.gradients(loss, tf.trainable_variables())
        grads = list(zip(grads, tf.trainable_variables()))

        # Loss scaling
        grads = [(grad / opts.loss_scaling, var) for grad, var in grads]

        # Apply weight_decay directly to gradients
        if opts.weight_decay != 0:
            grads = [(grad + (opts.weight_decay * var),
                      var) if 'l2tag' in var.name and 'kernel' in var.name else
                     (grad, var) for grad, var in grads]

        # clip gradients
        if opts.gradient_clipping:
            grads = [(tf.clip_by_value(grad, -1., 1.), var)
                     for grad, var in grads]

        # Op to update all variables according to their gradient
        apply_grads = optimizer.apply_gradients(grads_and_vars=grads)
        return loss / opts.loss_scaling, rmse_metric, apply_grads
    elif mode == util.Modes.VALID:
        return loss / opts.loss_scaling, rmse_metric, None
Exemplo n.º 5
0
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)
    graph = tf.Graph()
    with graph.as_default():
        env = gym.make("CartPole-v0")
        model = MLPModel(config=config.CartPoleConfig())
        agent = Agent(model=model)

        logdir = "/tmp/cart_pole"
        if not os.path.isdir(logdir):
            os.makedirs(logdir)

        game = Game(env=env,
                    agent=agent,
                    logdir=logdir,
                    should_render=True,
                    should_load=FLAGS.load)
        game.train(500)
        game.play(50)
Exemplo n.º 6
0
    if not os.path.exists('./iter_num/' + args.model_name):
        os.makedirs('./iter_num/' + args.model_name)
    if not os.path.exists('./logs/' + args.model_name):
        os.makedirs('./logs/' + args.model_name)
    if not os.path.exists('./labels/' + args.model_name):
        os.makedirs('./labels/' + args.model_name)
    if not os.path.exists('./c/'):
        os.makedirs('./c/')

    dataset = Dataset(args)
    change_itr = range(8000, 100000, 4000)
    logger = Logger('./logs/' + args.model_name)
    if args.env_name == 'bimgame':
        model = ConvModel(3, args.num_subgoals, use_rnn=False).to(device)
    else:
        model = MLPModel(46, args.num_subgoals, use_rnn=False).to(device)

    start_itr = 0
    c = []
    if args.one_class:
        if args.pretrained_ckpt is not None:
            model.load_state_dict(
                torch.load('./ckpt/' + args.pretrained_ckpt + '.pkl'))
            start_itr = np.load('./iter_num/' + args.pretrained_ckpt + '.npy')
            c = torch.from_numpy(
                np.load('./c/' + args.pretrained_ckpt +
                        '.npy')).float().to(device)
        # computing initial c for one-class out-of-set estimation
        if len(c) == 0:
            c = get_c(dataset, model, args)
Exemplo n.º 7
0
def mlp_model_func(ob_space, ac_space):
    return MLPModel(ob_space,
                    ac_space,
                    ob_filter=True,
                    gaussian_fixed_var=True)
Exemplo n.º 8
0
class Agent:
    """Interacts with and learns from the environment."""
    def __init__(self, state_size: int, action_size: int, seed: int):
        """Initialize an Agent object.
        
        Params
        ======
            state_size (int): dimension of each state
            action_size (int): dimension of each action
            seed (int): random seed
        """
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        # Q-Network
        self.q_local = MLPModel(state_size, action_size, seed).to(device)
        self.q_target = MLPModel(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.q_local.parameters(), lr=LR)

        # Replay memory
        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        # Initialize time step (for updating every UPDATE_EVERY steps)
        self.t_step = 0

    def step(self, state: np.ndarray, action: int, reward: float,
             next_state: np.ndarray, done: bool):
        """Save data and maybe train the model"""
        # Save experience in replay memory
        self.memory.add(state, action, reward, next_state, done)

        # Learn every UPDATE_EVERY time steps.
        self.t_step = (self.t_step + 1) % UPDATE_EVERY
        if self.t_step == 0:
            # If enough samples are available in memory, get random subset and learn
            if len(self.memory) > BATCH_SIZE:
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, state: np.ndarray, eps: float = 0.) -> int:
        """Returns actions for given state as per current policy.
        
        Params
        ======
            state (array_like): current state
            eps (float): epsilon, for epsilon-greedy action selection
        """
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.q_local.eval()
        with torch.no_grad():
            action_values = self.q_local(state)
        self.q_local.train()

        # Epsilon-greedy action selection
        if random.random() > eps:
            action = np.argmax(action_values.cpu().data.numpy())
        else:
            action = random.choice(np.arange(self.action_size))

        return action

    def learn(self, experiences: Tuple[Tensor, ...], gamma: float):
        """Update value parameters using given batch of experience tuples.

        Params
        ======
            experiences (Tuple[torch.Variable]): tuple of (s, a, r, s', done) tuples 
            gamma (float): discount factor
        """
        states, actions, rewards, next_states, dones = experiences
        # 64 x 8, 64 x 1, 64 x 1,   64 x 8,    64 x 1

        q_current = self.q_local(states)  # B x A = 64 x 4
        v_current = torch.gather(q_current, dim=1, index=actions)

        q_next = self.q_target(next_states)

        q_max, _ = q_next.max(dim=1)
        q_max = q_max.view(-1, 1)

        loss = (rewards + gamma * q_max * (1 - dones) - v_current)**2
        loss = loss.mean()

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        # ------------------- update target network ------------------- #
        self.soft_update(TAU)

    def soft_update(self, tau: float):
        """Soft update model parameters.
        θ_target = τ*θ_local + (1 - τ)*θ_target

        Params
        ======
            local_model (PyTorch model): weights will be copied from
            target_model (PyTorch model): weights will be copied to
            tau (float): interpolation parameter 
        """
        for target_param, local_param in zip(self.q_target.parameters(),
                                             self.q_local.parameters()):
            target_param.data.copy_(tau * local_param.data +
                                    (1.0 - tau) * target_param.data)