def get_data_path(dataset_name, local_root, local_repo='', path=''): """ Dataset specification, see: get_data_path, https://tensorport.com/documentation/api/#get_data_path If local_root starts with gs:// we suppose a bucket in google cloud and return local_root / local_repo / local_path :param str name: TensorPort dataset repository name, e.g. user_name/repo_name :param str local_root: specifies the root directory for dataset. e.g. /home/username/datasets, gs://my-project/my_dir :param str local_repo: specifies the repo name inside the root data path. e.g. my_repo_data/ :param str path: specifies the path inside the repository, (optional) e.g. train :return str: the real path of the dataset """ if local_root.startswith('gs://'): return os.path.join(local_root, local_repo, path) return tensorport.get_data_path(dataset_name=dataset_name, local_root=local_root, local_repo=local_repo, path=path)
"task_index", task_index, "Worker task index, should be >= 0. task_index=0 is " "the chief worker task the performs the variable " "initialization") flags.DEFINE_string("ps_hosts", ps_hosts, "Comma-separated list of hostname:port pairs") flags.DEFINE_string("worker_hosts", worker_hosts, "Comma-separated list of hostname:port pairs") # Training related flags # Training related flags flags.DEFINE_string( "data_dir", get_data_path( dataset_name="malo/mnist", #all mounted repo local_root=ROOT_PATH_TO_LOCAL_DATA, local_repo="mnist", path=''), "Path to store logs and checkpoints. It is recommended" "to use get_logs_path() to define your logs directory." "so that you can switch from local to tensorport without" "changing your code." "If you set your logs directory manually make sure" "to use /logs/ when running on TensorPort cloud.") flags.DEFINE_string( "log_dir", get_logs_path(root=PATH_TO_LOCAL_LOGS), "Path to dataset. It is recommended to use get_data_path()" "to define your data directory.so that you can switch " "from local to tensorport without changing your code." "If you set the data directory manually makue sure to use" "/data/ as root path when running on TensorPort cloud.")
y = to_categorical(y, num_classes=3) return y def encode_texts(text, embedding_vocab, embedding_size): emb_func = lambda sent: sentence_to_emb(sent, embedding_vocab, embedding_size) emb_texts = np.array([emb_func(sent) for sent in text]) return emb_texts if __name__ == "__main__": LOCAL_DATA_PATH = '~/Neural Networks/' train_data_path = get_data_path(dataset_name="yevhentysh/train-reloaded", local_root=LOCAL_DATA_PATH, local_repo='data', path="train") train_df = create_df(train_data_path) train_df.text = train_df.text.apply(clean_text) train_df.txt = train_df.text.apply(lambda row: lemmatize_text(row)) vocab_size = 5000 vocab = get_vocabulary(train_df, length=vocab_size) emb_vocab = embedding_mapping(vocab) l = len(emb_vocab) X_train, X_test, y_train, y_test = split_data(train_df, 0.8) embed_size = 20 X_train = encode_texts(X_train, emb_vocab, embed_size) X_test = encode_texts(X_test, emb_vocab, embed_size)
# print ('flags init done') #start of tport snippet 1 #Path to your data locally. This will enable to run the model both locally and on PATH_TO_LOCAL_LOGS = os.path.expanduser('~/Desktop/compBios/satellite_image_rec/projects/sat_image_proj/logs/') ROOT_PATH_TO_LOCAL_DATA = os.path.expanduser('~/data/Harrison/sat_img_dataset/') #end of tport snippet 1 #Define the path from the root data directory to your data. flags.DEFINE_string( "train_data_dir", get_data_path( dataset_name = "Harrison/satimages-1", local_root = ROOT_PATH_TO_LOCAL_DATA, local_repo = "", path = '' ), "Path to dataset. It is recommended to use get_data_path()" "to define your data directory.so that you can switch " "from local to tensorport without changing your code." "If you set the data directory manually makue sure to use" "/data/ as root path when running on TensorPort cloud." ) flags.DEFINE_string("logs_dir", get_logs_path(root=PATH_TO_LOCAL_LOGS), "Path to store logs and checkpoints. It is recommended" "to use get_logs_path() to define your logs directory." "so that you can switch from local to tensorport without" "changing your code." "If you set your logs directory manually make sure"
def main(): """ Main wrapper""" # tport snippet 1 - get environment variables try: job_name = os.environ['JOB_NAME'] task_index = os.environ['TASK_INDEX'] ps_hosts = os.environ['PS_HOSTS'] worker_hosts = os.environ['WORKER_HOSTS'] except: job_name = None task_index = 0 ps_hosts = None worker_hosts = None #Path to your data locally. This will enable to run the model both locally and on # tensorport without changes PATH_TO_LOCAL_LOGS = os.path.expanduser('~/Documents/tensorport-self-driving-demo/logs/') ROOT_PATH_TO_LOCAL_DATA = os.path.expanduser('~/Documents/comma/') #end of tport snippet 1 #Flags flags = tf.app.flags FLAGS = flags.FLAGS # tport snippet 2: flags. #Define the path from the root data directory to your data. #We use glob to match any .h5 datasets in Documents/comma locally, or in data/ on tensorport flags.DEFINE_string( "train_data_dir", get_data_path( dataset_name = "tensorport/*", #all mounted repo local_root = ROOT_PATH_TO_LOCAL_DATA, local_repo = "comma-final", #all repos (we use glob downstream, see read_data.py) path = 'camera/training/*.h5'#all .h5 files ), """Path to training dataset. It is recommended to use get_data_path() to define your data directory. If you set your dataset directory manually make sure to use /data/ as root path when running on TensorPort cloud. On tensrport, the data will be mounted in /data/user/tport_dataset_name, so you can acces `path` with /data/user/tport_dataset_name/path """ ) flags.DEFINE_string("logs_dir", get_logs_path(root=PATH_TO_LOCAL_LOGS), "Path to store logs and checkpoints. It is recommended" "to use get_logs_path() to define your logs directory." "If you set your logs directory manually make sure" "to use /logs/ when running on TensorPort cloud.") # Define worker specific environment variables. Handled automatically. flags.DEFINE_string("job_name", job_name, "job name: worker or ps") flags.DEFINE_integer("task_index", task_index, "Worker task index, should be >= 0. task_index=0 is " "the chief worker task the performs the variable " "initialization") flags.DEFINE_string("ps_hosts", ps_hosts, "Comma-separated list of hostname:port pairs") flags.DEFINE_string("worker_hosts", worker_hosts, "Comma-separated list of hostname:port pairs") # end of tport snippet 2 # Training flags - feel free to play with that! flags.DEFINE_integer("batch",256,"Batch size") flags.DEFINE_integer("time",1,"Number of frames per sample") flags.DEFINE_integer("steps_per_epoch",10000,"Number of training steps per epoch") flags.DEFINE_integer("nb_epochs",200,"Number of epochs") # Model flags - feel free to play with that! flags.DEFINE_float("dropout_rate1",.2,"Dropout rate on first dropout layer") flags.DEFINE_float("dropout_rate2",.5,"Dropout rate on second dropout layer") flags.DEFINE_float("starter_lr",1e-6,"Starter learning rate. Exponential decay is applied") flags.DEFINE_integer("fc_dim",512,"Size of the dense layer") flags.DEFINE_boolean("nogood",False,"Ignore `goods` filters.") # tport snippet 3: configure distributed environment def device_and_target(): # If FLAGS.job_name is not set, we're running single-machine TensorFlow. # Don't set a device. if FLAGS.job_name is None: print("Running single-machine training") return (None, "") # Otherwise we're running distributed TensorFlow. print("Running distributed training") if FLAGS.task_index is None or FLAGS.task_index == "": raise ValueError("Must specify an explicit `task_index`") if FLAGS.ps_hosts is None or FLAGS.ps_hosts == "": raise ValueError("Must specify an explicit `ps_hosts`") if FLAGS.worker_hosts is None or FLAGS.worker_hosts == "": raise ValueError("Must specify an explicit `worker_hosts`") cluster_spec = tf.train.ClusterSpec({ "ps": FLAGS.ps_hosts.split(","), "worker": FLAGS.worker_hosts.split(","), }) server = tf.train.Server( cluster_spec, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == "ps": server.join() worker_device = "/job:worker/task:{}".format(FLAGS.task_index) # The device setter will automatically place Variables ops on separate # parameter servers (ps). The non-Variable ops will be placed on the workers. return ( tf.train.replica_device_setter( worker_device=worker_device, cluster=cluster_spec), server.target, ) device, target = device_and_target() # end of tport snippet 3 print(FLAGS.logs_dir) print(FLAGS.train_data_dir) if FLAGS.logs_dir is None or FLAGS.logs_dir == "": raise ValueError("Must specify an explicit `logs_dir`") if FLAGS.train_data_dir is None or FLAGS.train_data_dir == "": raise ValueError("Must specify an explicit `train_data_dir`") # if FLAGS.val_data_dir is None or FLAGS.val_data_dir == "": # raise ValueError("Must specify an explicit `val_data_dir`") TIME_LEN = 1 #1 video frame. Other not supported. # Define graph with tf.device(device): X = tf.placeholder(tf.float32, [FLAGS.batch, 3, 160, 320], name="X") Y = tf.placeholder(tf.float32,[FLAGS.batch,1], name="Y") # angle only S = tf.placeholder(tf.float32,[FLAGS.batch,1], name="S") #speed predictions = get_model(X,FLAGS) steering_summary = tf.summary.image("green-is-predicted",render_steering_tf(X,Y,S,predictions)) # Adding numpy operation to graph. Adding image to summary loss = get_loss(predictions,Y) training_summary = tf.summary.scalar('Training_Loss', loss)#add to tboard #Batch generators gen_train = gen(FLAGS.train_data_dir, time_len=FLAGS.time, batch_size=FLAGS.batch, ignore_goods=FLAGS.nogood) global_step = tf.contrib.framework.get_or_create_global_step() learning_rate = tf.train.exponential_decay(FLAGS.starter_lr, global_step,1000, 0.96, staircase=True) train_step = ( tf.train.AdamOptimizer(learning_rate) .minimize(loss, global_step=global_step) ) def run_train_epoch(target,gen_train,FLAGS,epoch_index): """Restores the last checkpoint and runs a training epoch Inputs: - target: device setter for distributed work - FLAGS: - requires FLAGS.logs_dir from which the model will be restored. Note that whatever most recent checkpoint from that directory will be used. - requires FLAGS.steps_per_epoch - gen_train: training data generator - epoch_index: index of current epoch """ hooks=[tf.train.StopAtStepHook(last_step=FLAGS.steps_per_epoch*epoch_index)] # Increment number of required training steps i = 1 with tf.train.MonitoredTrainingSession(master=target, is_chief=(FLAGS.task_index == 0), checkpoint_dir=FLAGS.logs_dir, hooks = hooks) as sess: while not sess.should_stop(): batch_train = gen_train.next() feed_dict = {X: batch_train[0], Y: batch_train[1], S: batch_train[2] } variables = [loss, learning_rate, train_step] current_loss, lr, _ = sess.run(variables, feed_dict) print("Iteration %s - Batch loss: %s" % ((epoch_index)*FLAGS.steps_per_epoch + i,current_loss)) i+=1 for e in range(FLAGS.nb_epochs): run_train_epoch(target, gen_train, FLAGS, e)