def get_config(args=None, is_chief=True, task_index=0, chief_worker_hostname="", n_workers=1): logger.set_logger_dir(args.train_log_path + datetime.now().strftime('%Y-%m-%d_%H-%M-%S') + '_' + str(task_index)) # function to split model parameters between multiple parameter servers ps_strategy = tf.contrib.training.GreedyLoadBalancingStrategy( len(cluster['ps']), tf.contrib.training.byte_size_load_fn) device_function = tf.train.replica_device_setter( worker_device='/job:worker/task:{}/cpu:0'.format(task_index), cluster=cluster_spec, ps_strategy=ps_strategy) M = Model(device_function) name_base = str(uuid.uuid1()).replace('-', '')[:16] PIPE_DIR = os.environ.get('TENSORPACK_PIPEDIR', '.').rstrip('/') namec2s = 'ipc://{}/sim-c2s-{}'.format(PIPE_DIR, name_base) names2c = 'ipc://{}/sim-s2c-{}'.format(PIPE_DIR, name_base) procs = [ MySimulatorWorker(k, namec2s, names2c) for k in range(args.simulator_procs) ] ensure_proc_terminate(procs) start_proc_mask_signal(procs) neptune_client = neptune_mp_server.Client( server_host=chief_worker_hostname, server_port=args.port) master = MySimulatorMaster(task_index, neptune_client, namec2s, names2c, M, dummy=args.dummy, predictor_threads=args.nr_predict_towers, predict_batch_size=args.predict_batch_size, do_train=args.do_train) # here's the data passed to the repeated data source dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) with tf.device(device_function): with tf.variable_scope(tf.get_variable_scope(), reuse=None): lr = tf.Variable(args.learning_rate, trainable=False, name='learning_rate') tf.summary.scalar('learning_rate', lr) intra_op_par = args.intra_op_par inter_op_par = args.inter_op_par session_config = get_default_sess_config(0.5) print("{} {}".format(intra_op_par, type(intra_op_par))) if intra_op_par is not None: session_config.intra_op_parallelism_threads = intra_op_par if inter_op_par is not None: session_config.inter_op_parallelism_threads = inter_op_par session_config.log_device_placement = False extra_arg = { 'dummy_predictor': args.dummy_predictor, 'intra_op_par': intra_op_par, 'inter_op_par': inter_op_par, 'max_steps': args.max_steps, 'device_count': { 'CPU': args.cpu_device_count }, 'threads_to_trace': args.threads_to_trace, 'dummy': args.dummy, 'cpu': args.cpu, 'queue_size': args.queue_size, #'worker_host' : "grpc://localhost:{}".format(cluster['worker'][my_task_index].split(':')[1]), 'worker_host': server.target, 'is_chief': is_chief, 'device_function': device_function, 'n_workers': n_workers, 'use_sync_opt': args.use_sync_opt, 'port': args.port, 'batch_size': BATCH_SIZE, 'debug_charts': args.debug_charts, 'adam_debug': args.adam_debug, 'task_index': task_index, 'lr': lr, 'schedule_hyper': args.schedule_hyper, 'experiment_dir': args.experiment_dir } print("\n\n worker host: {} \n\n".format(extra_arg['worker_host'])) with tf.device(device_function): if args.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(lr, epsilon=args.epsilon, beta1=args.beta1, beta2=args.beta2) if args.adam_debug: optimizer = MyAdamOptimizer(lr, epsilon=args.epsilon, beta1=args.beta1, beta2=args.beta2) elif args.optimizer == 'gd': optimizer = tf.train.GradientDescentOptimizer(lr) elif args.optimizer == 'adagrad': optimizer = tf.train.AdagradOptimizer(lr) elif args.optimizer == 'adadelta': optimizer = tf.train.AdadeltaOptimizer(lr, epsilon=1e-3) elif args.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(lr, momentum=0.9) elif args.optimizer == 'rms': optimizer = tf.train.RMSPropOptimizer(lr) # wrap in SyncReplicasOptimizer if args.use_sync_opt == 1: if not args.adam_debug: optimizer = tf.train.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=args.num_grad, total_num_replicas=n_workers) else: optimizer = MySyncReplicasOptimizer( optimizer, replicas_to_aggregate=args.num_grad, total_num_replicas=n_workers) extra_arg['hooks'] = optimizer.make_session_run_hook(is_chief) callbacks = [ StatPrinter(), master, DebugLogCallback(neptune_client, worker_id=task_index, nr_send=args.send_debug_every, debug_charts=args.debug_charts, adam_debug=args.adam_debug, schedule_hyper=args.schedule_hyper) ] if args.debug_charts: callbacks.append( HeartPulseCallback('heart_pulse_{}.log'.format( os.environ['SLURMD_NODENAME']))) if args.early_stopping is not None: args.early_stopping = float(args.early_stopping) if my_task_index == 1 and not args.eval_node: # only one worker does evaluation callbacks.append( PeriodicCallback( Evaluator(EVAL_EPISODE, ['state'], ['logits'], neptune_client, worker_id=task_index, solved_score=args.early_stopping), 2)) elif my_task_index == 1 and not args.eval_node: # only 1 worker does evaluation callbacks.append( PeriodicCallback( Evaluator(EVAL_EPISODE, ['state'], ['logits'], neptune_client, worker_id=task_index), 2)) if args.save_every != 0: callbacks.append( PeriodicPerStepCallback( ModelSaver(var_collections=M.vars_for_save, models_dir=args.models_dir), args.save_every)) if args.schedule_hyper and my_task_index == 2: callbacks.append( HyperParameterScheduler('learning_rate', [(20, 0.0005), (60, 0.0001)])) callbacks.append( HyperParameterScheduler('entropy_beta', [(40, 0.005), (80, 0.001)])) return TrainConfig(dataset=dataflow, optimizer=optimizer, callbacks=Callbacks(callbacks), extra_threads_procs=[master], session_config=session_config, model=M, step_per_epoch=STEP_PER_EPOCH, max_epoch=args.max_epoch, extra_arg=extra_arg)
input_queues[worker_id].put(dist) for t in threads: t.join() scores = [] for _ in range(worker_num): scores.append(score_queue.get()) return np.mean(scores), np.max(scores) eval_model.state = tf.placeholder(tf.float32, shape=(None, ) + IMAGE_SHAPE3) eval_model.policy = build_graph(eval_model.state) if __name__ == '__main__': neptune_client = neptune_mp_server.Client(server_host=args.server_host, server_port=args.server_port) i = 0 dir_path = os.path.join(args.models_dir, 'iter_{}') # print("WAITING FOR DIR {}".format(dir_path.format(i))) while True: if os.path.isdir(dir_path.format(i)): dir_path_i = dir_path.format(i) time.sleep(0.1) for f in os.listdir(dir_path_i): if f[:5] == 'model': model_path = os.path.join(dir_path_i, f) break model_time = float(model_path.split('-')[1]) model_steps = float(model_path.split('-')[2]) model_mean, model_max = eval_model(model_path, EVAL_EPISODE)