def get_peer_latencies(): """Returns the vector V of round-trip time from this peer to all other peers. For the peer of rank i, V[j] is the RTT from i to j (j != i), V[i] = 0. """ return _op_lib.kungfu_get_peer_latencies( cluster_size=current_cluster_size())
def get_neighbour_mask(edges): """Compute a bool vector of neighbours for the current peer. For the peer of rank i, v[j] = true if (i, j) is an edge of the MST, otherwise v[j] = false. """ return _op_lib.kungfu_get_neighbour_mask( edges, self_rank=current_rank(), cluster_size=current_cluster_size())
def get_peer_latencies(local_step=None): """Returns the vector V of round-trip time from this peer to all other peers. For the peer of rank i, V[j] is the RTT from i to j (j != i), V[i] = 0. """ # FIXME: don't require input if local_step is None: import tensorflow as tf local_step = tf.Variable(tf.zeros([], tf.int64), trainable=False) return _op_lib.kungfu_get_peer_latencies( local_step, cluster_size=current_cluster_size())
def _parse_schedule(schedule, batch_size, num_train): # schedule is of the form # f1;e1;f2;e2;f3;e3 tokens = schedule.split(",") print("Num train: " + str(num_train)) print("Batch size: " + str(batch_size)) to_gs = lambda epoch: int(epoch * num_train / (batch_size * current_cluster_size())) pairs = [(to_gs(int(t.split(":")[0])), float(t.split(":")[1])) for t in tokens] steps, fractions = zip(*pairs) print("Steps: " + str(steps)) print("Fractions: " + str(fractions)) return steps, fractions