예제 #1
0
def etl_people_survey_result(people_survey, pid, project_id, survey_id,
                             enterprise_info, orgid, t):
    # 添加人员问卷信息到 people_survey_info
    if people_survey.begin_answer_time == None or people_survey.finish_time == None:
        people_survey_info = [
            people_survey.id, pid, survey_id, project_id, enterprise_info.id,
            orgid, people_survey.begin_answer_time, people_survey.finish_time,
            people_survey.model_score, people_survey.dimension_score,
            people_survey.substandard_score, people_survey.facet_score,
            people_survey.happy_score, people_survey.happy_ability_score,
            people_survey.happy_efficacy_score, people_survey.praise_score,
            people_survey.uniformity_score, t
        ]
    else:
        people_survey_info = [
            people_survey.id, pid, survey_id, project_id, enterprise_info.id,
            orgid,
            time_format(people_survey.begin_answer_time),
            time_format(people_survey.finish_time), people_survey.model_score,
            people_survey.dimension_score, people_survey.substandard_score,
            people_survey.facet_score, people_survey.happy_score,
            people_survey.happy_ability_score,
            people_survey.happy_efficacy_score, people_survey.praise_score,
            people_survey.uniformity_score, t
        ]

    print('tttt%s' % people_survey.begin_answer_time)

    return people_survey_info
예제 #2
0
    def update_list(self):
        que = self.w1.update()
        rows = self.cur.execute(que).fetchmany(self.count_rows_spin.value())
        self.list_order.clear()
        self.list_order_2.clear()
        for i in rows:
            dish_time_s = self.cur.execute(f'''select cooktime from dish 
                                                    where id = (select dishid from orderdish
                                                    where id = ({i[0]}))''').fetchone()[0]
            date_time_s = self.cur.execute(f'''select datetime from orderclient
                                              where id = {i[1]}''').fetchone()[0]
            dish_count = int(i[4])
            date_time = QDateTime.fromString(date_time_s, date_time_format())
            dish_time = QTime.fromString(dish_time_s, time_format())
            dish_time_minutes = dish_time.hour() * 60 + dish_time.minute() * dish_count
            dish_time = QTime(dish_time_minutes // 60, dish_time_minutes % 60)

            secs_passed = date_time.secsTo(QDateTime.currentDateTime())
            hms = [dish_time.hour() - secs_passed // 3600,
                   dish_time.minute() - secs_passed // 60 % 60,
                   59 - secs_passed % 60]
            time_last = QTime(*hms)
            if time_last.isValid():
                order = [time_last.toString(time_format() + ':ss'), *i[2:]]
            else:
                order = ['Done', *i[2:]]
            item = QListWidgetItem(' - '.join(map(str, order)))
            if not time_last.isValid():
                item.setBackground(QColor(255, 220, 220))
                self.list_order_2.addItem(item)
            else:
                self.list_order.addItem(item)
예제 #3
0
def etl_project_info(assess, enterpriseinfo, t):
    # 添加项目信息到  assess_info
    assess_info = [[
        assess.id, assess.name, enterpriseinfo.id,
        time_format(assess.begin_time),
        time_format(assess.end_time), assess.assess_type, assess.user_count, t
    ]]
    return assess_info
예제 #4
0
def get_string_repr_duration_and_place(
        current_datetime,  # DateTime
        place,  # Str
        spent_time  # Float
):
    # DateTime after spent
    end_datetime = get_datetime_after_spent(current_datetime, spent_time)
    # Get time format of start and end time
    start_time_format = time_format(current_datetime.hour,
                                    current_datetime.minute)
    end_time_format = time_format(end_datetime.hour, end_datetime.minute)
    return f'{ start_time_format } - { end_time_format } - { place }'
예제 #5
0
 def train_agent(self):
     average_reward = 0
     scores_window = deque(maxlen=100)
     t0 = time.time()
     for i_epiosde in range(1, self.episodes):
         episode_reward = 0
         state = self.env.reset()
         t = 0
         while True:
             t += 1
             action = self.act(state)
             next_state, reward, done, _ = self.env.step(action)
             episode_reward += reward
             if i_epiosde > 10:
                 self.learn()
             self.memory.add(state, reward, action, next_state, done)
             state = next_state
             if done:
                 scores_window.append(episode_reward)
                 break
         if i_epiosde % self.eval == 0:
             self.eval_policy()
         ave_reward = np.mean(scores_window)
         print("Epiosde {} Steps {} Reward {} Reward averge{:.2f} Time {}".
               format(i_epiosde, t, episode_reward, np.mean(scores_window),
                      time_format(time.time() - t0)))
         self.writer.add_scalar('Aver_reward', ave_reward, self.steps)
예제 #6
0
def train(env, config):
    """

    """
    t0 = time.time()
    save_models_path = str(config["locexp"])
    memory = ReplayBuffer((8, ), (1, ), config["buffer_size"], config["seed"],
                          config["device"])
    memory.load_memory(config["buffer_path"])
    agent = Agent(state_size=8, action_size=4, config=config)
    if config["idx"] < memory.idx:
        memory.idx = config["idx"]
    print("memory idx ", memory.idx)
    for t in range(config["predicter_time_steps"]):
        text = "Train Predicter {}  \ {}  time {}  \r".format(
            t, config["predicter_time_steps"], time_format(time.time() - t0))
        print(text, end='')
        agent.learn(memory)
        if t % int(config["eval"]) == 0:
            print(text)
            agent.save(save_models_path + "/models/{}-".format(t))
            #agent.test_predicter(memory)
            agent.test_q_value(memory)
            agent.eval_policy()
            agent.eval_policy(True, 1)
예제 #7
0
 def _ult_charge_append(self, players, time):
     ult_charge_row = [utils.time_format(time)]
     for player in players:
         chara = player.chara
         ult_charge = player.ult_charge
         ult_charge_row += [chara, ult_charge]
     self.sheet.append(ult_charge_row)
     return
예제 #8
0
 def add(self, res):
     # Add WorkMinute
     dishcookminte = self.cur.execute(
         f'select cooktime from dish where id = '
         f'{res[2]}').fetchone()[0]
     dishcookminte = QTime.fromString(dishcookminte, time_format())
     dishcookminte = dishcookminte.hour() * 60 + dishcookminte.minute(
     ) * int(res[3])
     self.cur.execute(f'''update cook set
                     workminute = workminute + {dishcookminte}
                     where id = {res[1]}''')
     return super().add(res)
    def output_result(self, test, dev):
        resultf = self.__logdir + '/result.pkl'
        elapsed_time = time.time() - self.__start_time
        d = get_arg('all')
        d['test'] = test
        d['dev'] = dev
        d['composition'] = d['composition'].name
        d['time'] = utils.time_format(elapsed_time)
        pickle.dump(d, open(resultf, 'wb'))

        script = 'import pickle\n' +\
        "for k, v in pickle.load(open('result.pkl', 'rb')).items():\n" +\
        "    print('{}: {}'.format(k, v))"
        print(script, file=open(self.__logdir + '/print_result.py', 'w'))
예제 #10
0
    def _format(data):
        """
        格式化输出信息
        :param data: {"字段名": value, ...}
        :return: [value, ...]
        """
        data['time'] = utils.time_format(data['time'])
        format_spec = Config.format
        result = [''] * (len(Config.title) + 1)
        for k, v in format_spec.items():
            if k in ['object hero', 'subject hero'] and k in data:
                data[k] = utils.chara_capitalize(data[k])
            if k in data:
                result[v-1] = data[k]

        for i, s in enumerate(result):
            if s == 'empty' or s == 'Empty':
                result[i] = ''
        return result
예제 #11
0
    def main_loop(self, datasets, epochs=100, batchsize=100, reporter=[], validation=False):
        '''
        Main learning loop
        '''
        # Start training
        print('\n\n--- START TRAINING ---\n\n')
        num_data = len(datasets)
        self.on_train_begin()
        for e in range(epochs):
            perm = np.random.permutation(num_data)
            start_time = time.time()
            for b in range(0, num_data, batchsize):
                bsize = min(batchsize, num_data - b)
                indx = perm[b:b+bsize]

                # Print current status
                ratio = 100.0 * (b + bsize) / num_data
                print(chr(27) + "[2K", end='')
                print('\rEpoch #%d | %d / %d (%6.2f %%) ' % \
                      (e + 1, b + bsize, num_data, ratio), end='')

                # Get batch and train on it
                x_batch = self.make_batch(datasets, indx)
                losses = self.train_on_batch(x_batch)
                self.report_logs(reporter, losses)

                # Compute ETA
                elapsed_time = time.time() - start_time
                eta = elapsed_time / (b + bsize) * (num_data - (b + bsize))
                print('| ETA: %s ' % utils.time_format(eta), end='')

                sys.stdout.flush()
            train_logs=self.train_on_batch(datasets.get_training())
            val_logs=[]
            if validation:
                val_logs = self.train_on_batch(datasets.get_validation())
                print('\tvalidation ', end='')
                self.report_logs(reporter,val_logs)
                print('\n')
            _logs=(train_logs,val_logs)
            self.on_epoch_end(e,_logs)
예제 #12
0
            plt.imshow(np.squeeze(final_lstm_pred_vals[j, 0, :, :, :]))
            fig.add_subplot(rows, columns, j + (2 * columns) + 1)
            plt.imshow(np.squeeze(final_ground_truth_vals[j + 1, 0, :, :, :]))
            fig.add_subplot(rows, columns, j + 1 + (3 * columns))
            plt.imshow(np.squeeze(final_pred_vals[j + columns + 1,
                                                  0, :, :, :]))
            fig.add_subplot(rows, columns, j + columns + 1 + (3 * columns))
            plt.imshow(
                np.squeeze(final_lstm_pred_vals[j + columns, 0, :, :, :]))
            fig.add_subplot(rows, columns,
                            j + (2 * columns) + 1 + (3 * columns))
            plt.imshow(
                np.squeeze(final_ground_truth_vals[j + columns + 1,
                                                   0, :, :, :]))
    plt.savefig('images/final_pred.png')

    if not ARGS.test:
        print(
            saver.save(
                sess,
                '/home/louis/MSc_Thesis/saved_models/saved_model_weights.ckpt')
        )

print("Setup:", utils.time_format(setup_time - start_time))
print("Training: {} over {} epochs of size {}; {} per batch".format(
    utils.time_format(time.time() - setup_time), ARGS.num_epochs, batch_num,
    utils.time_format(
        (time.time() - setup_time) / (ARGS.num_epochs * batch_num))))

print("Total:", utils.time_format(time.time() - start_time))
def train_agent(env, args, config):
    """
    Args:
    """

    # create CNN convert the [1,3,84,84] to [1, 200]
    now = datetime.now()
    dt_string = now.strftime("%d_%m_%Y_%H:%M:%S")
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])
    if torch.cuda.is_available() and not args.disable_cuda:
        args.device = torch.device('cuda')
        torch.cuda.manual_seed(np.random.randint(1, 10000))
        torch.backends.cudnn.enabled = args.enable_cudnn
    pathname = dt_string + "_seed" + str(config["seed"])
    print("save tensorboard {}".format(config["locexp"]))
    tensorboard_name = str(config["locexp"]) + '/runs/' + pathname
    agent = Agent(args, env)

    #agent.load(str(args.locexp), "1/checkpoint-52038.pth")
    memory = ReplayMemory(args, args.memory_capacity)
    #memory.load_memory("memory_pacman")
    #memory =  ReplayBuffer((3, config["size"], config["size"]), (1,), config["expert_buffer_size"], int(config["image_pad"]), config["device"])
    priority_weight_increase = (1 - args.priority_weight) / (args.T_max -
                                                             args.learn_start)
    writer = SummaryWriter(tensorboard_name)
    results_dir = os.path.join(str(config["locexp"]), args.id)
    mkdir("", results_dir)
    scores_window = deque(maxlen=100)
    steps_window = deque(maxlen=100)
    scores = []
    t0 = time.time()
    # Training loop
    agent.train()
    T, done = 0, True
    print("result dir ", results_dir)
    agent.save(results_dir, 'checkpoint-{}.pth'.format(T))
    #eval_policy(env, agent, writer, T, config)
    episode = -1
    steps = 0
    score = 0
    print("save policy ", args.checkpoint_interval)
    # eval_policy(env, agent, writer, 0, config)
    for T in range(1, args.T_max + 1):
        # print("\r {} of {}".format(T, args.T_max), end='')
        if done:
            episode += 1
            # Checkpoint the network
            if episode % 100 == 0:
                memory.save_memory("memory_pacman")
                print("Eval policy")
                #eval_policy(env, agent, writer, T, config)
                agent.save(results_dir, 'checkpoint-{}.pth'.format(T))
            scores_window.append(score)  # save most recent scor
            scores.append(score)  # save most recent score
            steps_window.append(steps)
            ave_steps = np.mean(steps_window)
            print(
                '\rTime steps {}  episode {} score {} Average Score: {:.2f} steps {} ave steps {:.2f} time: {}'
                .format(T, episode, score, np.mean(scores_window), steps,
                        ave_steps, time_format(time.time() - t0)),
                end="")
            writer.add_scalar('Episode_reward ', score, T)
            average_reward = np.mean(scores_window)
            writer.add_scalar('Average_reward ', average_reward, T)
            state, done = env.reset("mediumClassic"), False
            steps = 0
            score = 0
        if T % args.replay_frequency == 0:
            agent.reset_noise()  # Draw a new set of noisy weights
        action = agent.act(
            state)  # Choose an action greedily (with noisy weights)
        next_state, reward, done, _ = env.step(action)  # Step
        score += reward
        steps += 1
        if steps == 125:
            done = True
        memory.append(state, action, reward,
                      done)  # Append transition to memory

        # Train and test
        if T >= args.learn_start:
            memory.priority_weight = min(
                memory.priority_weight + priority_weight_increase,
                1)  # Anneal importance sampling weight β to 1

            if T % args.replay_frequency == 0:
                agent.learn(
                    memory
                )  # Train with n-step distributional double-Q learning

            # Update target network
            if T % args.target_update == 0:
                agent.update_target_net()

        state = next_state
예제 #14
0
 def __init__(self, name, correct=None, enabled=True, default=None):
     super().__init__(name, QTimeEdit, correct, enabled, default)
     self.format = time_format()
예제 #15
0
 def train_agent(self):
     env = gym.make("LunarLanderContinuous-v2")
     average_reward = 0
     scores_window = deque(maxlen=100)
     s = 0
     t0 = time.time()
     for i_epiosde in range(self.episodes):
         episode_reward = 0
         state = env.reset()
         for t in range(self.max_timesteps):
             s += 1
             action = self.act(state)
             next_state, reward, done, _ = env.step(action)
             episode_reward += reward
             if i_epiosde > 3:
                 self.learn()
             self.memory.add(state, reward, action, next_state, done)
             state = next_state
             if done:
                 scores_window.append(episode_reward)
                 break
         if i_epiosde % self.eval == 0:
             self.eval_policy()
         ave_reward = np.mean(scores_window)
         print("Epiosde {} Steps {} Reward {} Reward averge{} Time {}".format(i_epiosde, t, episode_reward, np.mean(scores_window), time_format(time.time() - t0)))
         self.writer.add_scalar('Aver_reward', ave_reward, self.steps)
예제 #16
0
os.chmod(param_file_path, 0o777)

if ARGS.tfdbg:
    chosen_session = tf_debug.LocalCLIDebugWrapperSession(tf.Session())
else:
    chosen_session = tf.Session()
with chosen_session as sess:
    if ARGS.restore == None:
        sess.run(tf.global_variables_initializer())
    else:
        #restore_path = saver.restore(sess, os.path.join(checkpoint_dir, ARGS.restore))
        restore_path = saver.restore(sess, ARGS.restore)
        print("Restoring from checkpoint at", restore_path)
    train_start_time = time.time()
    print("Graph-build time: ",
          utils.time_format(train_start_time - start_time))

    dataset_length = len(input_d_vecs)
    num_batches = dataset_length // ARGS.batch_size

    dataset_length_validation = len(input_d_vecs_validation)
    num_batches_validation = dataset_length_validation // ARGS.batch_size

    best_em_score = 0.0
    best_avg_f1 = 0.0
    global_batch_num = 0
    new_avg_f1 = 0
    new_em_score = 0

    best_em_score_outer_prod = 0.0
    best_avg_f1_outer_prod = 0.0
def main():
    data = utils.read_file('./data/new.csv')
    print("There are %d samples in raw data set" % len(data))
    print("Raw input data set information")
    utils.data_info(data)
    utils.missing_info(data, "raw_missing")

    # handle format and garbled text issues in raw data
    data = utils.time_format(data)
    data = utils.garbled_drawing(data)
    data = utils.garbled_floor(data)
    data = utils.garbled_living(data)
    data = utils.garbled_bath(data)
    data = utils.garbled_construct(data)
    data = utils.strange_building(data)

    # drop columns that provide no help
    data = utils.drop_columns(data, ['url', 'id', 'price', 'DOM'])
    print(
        "Raw data set information after transferring format and drop columns")
    utils.data_info(data)
    utils.missing_info(data, "raw_missing_2")

    # the rawdata contains more than 300000 data points, try to use 10% of rawdata in this project
    x_raw, y_raw, data, y = utils.data_splitting(data, data['totalPrice'], 0.1)
    data.to_csv('small.csv')
    print("smaller data set", np.shape(data), np.shape(y))
    print("y_info")
    print(y.describe())
    plt.hist(y)
    plt.xlabel("totalPrice")
    plt.ylabel("counts")
    plt.savefig('y.png')
    plt.close()

    # split D into D'' and D_Test
    x_doubleprime, y_doubleprime, x_test, y_test = utils.data_splitting(
        data, data['totalPrice'], 0.2)
    print("D'' shape", np.shape(x_doubleprime), np.shape(y_doubleprime))
    print("D_test shape", np.shape(x_test), np.shape(y_test))

    # split D'' into D' and D_pt
    x_prime, y_prime, x_pt, y_pt = utils.data_splitting(
        x_doubleprime, x_doubleprime['totalPrice'], 0.1)
    print("D_pt shape", np.shape(x_pt), np.shape(y_pt))
    print("D_prime shape", np.shape(x_prime), np.shape(y_prime))

    # Use pre-training set to look at data and conduct initial test
    print("Pre-training data set preprocessing:")
    utils.pre_training(x_pt)

    # D' data set preprocessing
    print("D' preprocessing:")
    x_train, y_train, x_val, y_val, cols_keep, imputation = utils.preprocessing(
        x_prime)
    print("D' after preprocessing:")
    print("D_train shape after preprocessing", np.shape(x_train),
          np.shape(y_train))
    print("D_val shape after preprocessing", np.shape(x_val), np.shape(y_val))

    # Linear Regression
    lin_reg = model.linear_regression(x_train, y_train, x_val, y_val,
                                      cols_keep)

    # Ridge Regression
    rid_reg = model.ridge_regression(x_train, y_train, x_val, y_val, cols_keep)

    # Lasso Regression
    las_reg = model.lasso_regression(x_train, y_train, x_val, y_val, cols_keep)

    # Random Forest
    rf = model.random_forest(x_train, y_train, x_val, y_val, cols_keep)

    # model tuning
    x, y, rid_reg_cv = model.ridge_cv(x_train, y_train, x_val, y_val,
                                      cols_keep)
    las_reg_cv = model.lasso_cv(x_train, y_train, x_val, y_val, cols_keep)
    rf_cv = model.random_forest_cv(x_train, y_train, x_val, y_val, cols_keep)

    # Final_result
    x_cv = pd.concat([x, y], axis=1)
    with_missing_cv = utils.missing_info(x_cv, "cv_missing")
    with_missing_test = utils.missing_info(x_test, "test_missing")
    continuous = ["Lng", "Lat", "square", "ladderRatio", "communityAverage"]
    discrete = [
        "Cid", "tradeTime", "followers", "livingRoom", "drawingRoom",
        "kitchen", "bathRoom", "floor", "buildingType", "constructionTime",
        "renovationCondition", "buildingStructure", "elevator",
        "fiveYearsProperty", "subway", "district"
    ]
    x_cv, y_cv, x_test, y_test = utils.method_2_prime(x_cv, with_missing_cv,
                                                      x_test,
                                                      with_missing_test,
                                                      continuous, discrete)
    x_test = pd.DataFrame(x_test, index=x_test.index, columns=cols_keep)

    predict_lin_red = lin_reg.predict(x_test)
    print("Linear Regression:")
    print("RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_lin_red)))
    predict_rid_reg = rid_reg.predict(x_test)
    predict_rid_reg_cv = rid_reg_cv.predict(x_test)
    print("Ridge Regression:")
    print("RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_rid_reg)))
    print("After cross validation, RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_rid_reg_cv)))
    predict_las_reg = las_reg.predict(x_test)
    predict_las_reg_cv = las_reg_cv.predict(x_test)
    print("Lasso Regression:")
    print("RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_las_reg)))
    print("After cross validation, RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_las_reg_cv)))
    predict_rf = rf.predict(x_test)
    predict_rf_cv = rf_cv.predict(x_test)
    print("Random Forest:")
    print("RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_rf)))
    print("After cross validation, RMSE on test data = %.5f" %
          np.sqrt(mean_squared_error(y_test, predict_rf_cv)))

    # plot
    plt.scatter(x_test['square'],
                y_test,
                c='blue',
                marker='o',
                label='real test')
    plt.scatter(x_test['square'],
                predict_rf_cv,
                c='red',
                marker='x',
                label='predict test')
    plt.xlabel('square')
    plt.legend(loc='upper right')
    plt.savefig("feature_square.png")
    plt.close()

    plt.scatter(x_test['livingRoom'],
                y_test,
                c='blue',
                marker='o',
                label='real test')
    plt.scatter(x_test['livingRoom'],
                predict_rf_cv,
                c='red',
                marker='x',
                label='predict test')
    plt.xlabel('livingRoom')
    plt.legend(loc='upper right')
    plt.savefig("feature_living.png")
    plt.close()
def train_agent(env, config):
    """
    Args:
    """

    # create CNN convert the [1,3,84,84] to [1, 200]
    now = datetime.now()
    dt_string = now.strftime("%d_%m_%Y_%H:%M:%S")
    torch.manual_seed(config["seed"])
    np.random.seed(config["seed"])
    #pathname = str(args.locexp) + "/" + str(args.env_name) + '_agent_' + str(args.policy)
    #pathname += "_batch_size_" + str(args.batch_size) + "_lr_act_" + str(args.lr_actor)
    #pathname += "_lr_critc_" + str(args.lr_critic) + "_lr_decoder_"
    pathname = dt_string
    tensorboard_name = str(config["locexp"]) + '/runs/' + pathname
    agent = DQNAgent(state_size=200,
                     action_size=env.action_space.n,
                     config=config)
    writer = SummaryWriter(tensorboard_name)
    print("action_size {}".format(env.action_space.n))
    # eval_policy(env, agent, writer, 0, config)
    memory = ReplayBuffer((3, config["size"], config["size"]),
                          (1, ), config["expert_buffer_size"],
                          int(config["image_pad"]), config["device"])
    if config["create_buffer"]:
        create_buffer(env, memory, config)
        memory.load_memory("/export/leiningc/" + config["buffer_path"])
    else:
        print("load Buffer")
        memory.load_memory("/export/leiningc/" + config["buffer_path"])
        print("Buffer size {}".format(memory.idx))
    eps = config["eps_start"]
    eps_end = config["eps_end"]
    eps_decay = config["eps_decay"]
    scores_window = deque(maxlen=100)
    scores = []
    t0 = time.time()
    for i_episode in range(config["train_episodes"]):
        obs = env.reset()
        score = 0
        for t in range(config["max_t"]):
            action = agent.act(obs, eps)
            # action = env.action_space.sample()
            next_obs, reward, done_no_max, _ = env.step(action)
            done = done_no_max
            if t + 1 == config["max_t"]:
                print("t ", t)
                done = 0
            memory.add(obs, action, reward, next_obs, done, done_no_max)
            agent.step(memory, writer)
            obs = next_obs
            eps = max(eps_end, eps_decay * eps)  # decrease epsilon
            score += reward
            if done:
                break
        scores_window.append(score)  # save most recent scor
        scores.append(score)  # save most recent score
        ave_score = np.mean(scores_window)
        writer.add_scalar("ave_score", ave_score, i_episode)
        writer.add_scalar("episode_score", score, i_episode)
        print(
            '\rEpisode {} score {} \tAverage Score: {:.2f}  eps: {:.2f} time: {}'
            .format(i_episode, score, np.mean(scores_window), eps,
                    time_format(time.time() - t0)),
            end="")
        if i_episode % config["eval"] == 0:
            eval_policy(env, agent, writer, i_episode, config)
            agent.save(
                str(config["locexp"]) + "/models/eval-{}/".format(i_episode))
            print(
                'Episode {} Average Score: {:.2f}  eps: {:.2f} time: {}'.
                format(i_episode, np.mean(scores_window), eps,
                       time_format(time.time() - t0)), )
예제 #19
0
with tf.Session() as sess:
    # add additional options to trace the session execution
    options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()
    sess.run(tf.global_variables_initializer(), options=options, run_metadata=run_metadata)

    # Create the Timeline object, and write it to a json file
    fetched_timeline = timeline.Timeline(run_metadata.step_stats)
    chrome_trace = fetched_timeline.generate_chrome_trace_format()
    with open('timeline_01.json', 'w') as f:
        f.write(chrome_trace)


    #sess.run(tf.global_variables_initializer())
    train_start_time = time.time()
    print("Time elapsed from beginning until right before starting train is: ", utils.time_format(train_start_time - start_time))
    for i in range(ARGS.num_epochs):
        sess.run(iter_.initializer)
        while True:
            try:
                summary, _, loss_val = sess.run([merged, train_step, mean_loss])
                break
            except tf.errors.OutOfRangeError:
                writer.add_summary(summary, i)
                break

    train_end_time = time.time()

    print("Total training time (without data reading): ", utils.time_format(train_end_time - train_start_time))

    #save_path = saver.save(sess, "/tmp/model.ckpt")
예제 #20
0
파일: train.py 프로젝트: felixwzh/La-DTL
        saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=20)

        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        sess.run(tf.global_variables_initializer())

        # tensor board
        merged = tf.summary.merge_all()
        writer = tf.summary.FileWriter(log_path + '/', sess.graph)

        logging.info('Model builded, %s used\n' %
                     time_format(time.time() - tic0))

        test_feed_dicts = []
        for i in xrange((len(dataset_u.test_x) + args.batch_size_u - 1) //
                        args.batch_size_u):
            x, y, l, msl = data.pack(
                dataset_u.test_x[i * args.batch_size_u:(i + 1) *
                                 args.batch_size_u],
                dataset_u.test_y[i * args.batch_size_u:(i + 1) *
                                 args.batch_size_u])
            test_feed_dicts.append({
                x_u_: x,
                seq_len_u_: l,
                y_u_: y,
                msl_u_: msl,
                keep_prob_: 1
예제 #21
0
    def train(self):
        
        scores_window = deque(maxlen=100)
        step_window = deque(maxlen=100)
        eps = 1
        t0 = time.time()
        total_timesteps = 0
        i_episode = 0
        total_timesteps = 0
        while total_timesteps < self.total_frames:
            state = self.env.reset()
            env_score = 0
            steps = 0
            while True:
                total_timesteps += 1
                steps += 1
                action = self.act(state, eps)
                next_state, reward, done, _ = self.env.step(action)
                eps = max(self.eps_end, self.eps_decay*eps) # decrease epsilon
                if self.start_timesteps < total_timesteps:
                    self.step()
                env_score += reward
                self.replay_buffer.add(state, action, reward, next_state, done, done)
                state = next_state
                
                if done:
                    i_episode += 1
                    break
            
            scores_window.append(env_score)       # save most recent score
            step_window.append(steps)       # save most recent score
            mean_reward = np.mean(scores_window)
            mean_steps = np.mean(step_window)
            self.writer.add_scalar('env_reward', env_score, total_timesteps)
            self.writer.add_scalar('mean_reward', mean_reward, total_timesteps)
            self.writer.add_scalar('mean_steps', mean_steps, total_timesteps)
            self.writer.add_scalar('steps', steps, total_timesteps)
            print(' Totalsteps {} Episode {} Step {} Reward {} Average Score: {:.2f} epsilon {:.2f} time {}'  .format(total_timesteps, i_episode, steps, env_score, np.mean(scores_window), eps, time_format(time.time()-t0)))
            if i_episode % self.eval == 0:

                print('\rEpisode {}\tAverage Score: {:.2f} Time: {}'.format(i_episode, np.mean(scores_window),  time_format(time.time()-t0)))