def __init__(self, train_set, dev_set, test_set, numcep, numcontext, tower_feeder_count=-1, threads_per_queue=2): self.train = train_set self.dev = dev_set self.test = test_set self.sets = [train_set, dev_set, test_set] self.numcep = numcep self.numcontext = numcontext self.tower_feeder_count = max( len(get_available_gpus()), 1) if tower_feeder_count < 0 else tower_feeder_count self.threads_per_queue = threads_per_queue self.ph_x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) self.ph_x_length = tf.placeholder(tf.int32, []) self.ph_y = tf.placeholder(tf.int32, [ None, ]) self.ph_y_length = tf.placeholder(tf.int32, []) self.ph_batch_size = tf.placeholder(tf.int32, []) self.ph_queue_selector = tf.placeholder(tf.int32, name='Queue_Selector') self._tower_feeders = [ _TowerFeeder(self, i) for i in range(self.tower_feeder_count) ]
def __init__(self, train_set, dev_set, test_set, numcep, numcontext, alphabet, tower_feeder_count=-1, threads_per_queue=2): self.train = train_set self.dev = dev_set self.test = test_set self.sets = [train_set, dev_set, test_set] self.numcep = numcep self.numcontext = numcontext self.tower_feeder_count = max(len(get_available_gpus()), 1) if tower_feeder_count < 0 else tower_feeder_count self.threads_per_queue = threads_per_queue self.ph_x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)]) self.ph_x_length = tf.placeholder(tf.int32, []) self.ph_y = tf.placeholder(tf.int32, [None,]) self.ph_y_length = tf.placeholder(tf.int32, []) self.ph_batch_size = tf.placeholder(tf.int32, []) self.ph_queue_selector = tf.placeholder(tf.int32, name='Queue_Selector') self._tower_feeders = [_TowerFeeder(self, i, alphabet) for i in range(self.tower_feeder_count)]
def check_so(soname): """ Verify that we do have the 'soname' lib present in the system, and that it can be loaded. """ if len(get_available_gpus()) == 0: return None # Try to force load lib, this would fail if the lib is not there :) try: lib = cdll.LoadLibrary(soname) print("INFO: Found so as", lib) assert lib.__class__.__name__ == 'CDLL' assert lib._name == soname return True except OSError as ex: print("WARNING:", ex) return False
def _get_device_count(self): available_gpus = get_available_gpus() return max(len(available_gpus), 1)
def initialize_globals(): c = AttrDict() # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if not FLAGS.checkpoint_dir: FLAGS.checkpoint_dir = xdg.save_data_path( os.path.join('deepspeech', 'checkpoints')) if FLAGS.load not in ['last', 'best', 'init', 'auto', 'transfer']: FLAGS.load = 'auto' # Set default summary dir if not FLAGS.summary_dir: FLAGS.summary_dir = xdg.save_data_path( os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tfv1.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads, gpu_options=tfv1.GPUOptions(allow_growth=FLAGS.use_allow_growth)) # CPU device c.cpu_device = '/cpu:0' # Available GPU devices c.available_devices = get_available_gpus(c.session_config) # If there is no GPU available, we fall back to CPU based operation if not c.available_devices: c.available_devices = [c.cpu_device] if FLAGS.utf8: c.alphabet = UTF8Alphabet() else: c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Size of audio window in samples if (FLAGS.feature_win_len * FLAGS.audio_sample_rate) % 1000 != 0: log_error( '--feature_win_len value ({}) in milliseconds ({}) multiplied ' 'by --audio_sample_rate value ({}) must be an integer value. Adjust ' 'your --feature_win_len value or resample your audio accordingly.' ''.format(FLAGS.feature_win_len, FLAGS.feature_win_len / 1000, FLAGS.audio_sample_rate)) sys.exit(1) c.audio_window_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_len / 1000) # Stride for feature computations in samples if (FLAGS.feature_win_step * FLAGS.audio_sample_rate) % 1000 != 0: log_error( '--feature_win_step value ({}) in milliseconds ({}) multiplied ' 'by --audio_sample_rate value ({}) must be an integer value. Adjust ' 'your --feature_win_step value or resample your audio accordingly.' ''.format(FLAGS.feature_win_step, FLAGS.feature_win_step / 1000, FLAGS.audio_sample_rate)) sys.exit(1) c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: if not os.path.exists(FLAGS.one_shot_infer): log_error( 'Path specified in --one_shot_infer is not a valid file.') sys.exit(1) ConfigSingleton._config = c # pylint: disable=protected-access
def initialize_globals(): c = AttrDict() # CPU device c.cpu_device = '/cpu:0' # Available GPU devices c.available_devices = get_available_gpus() # If there is no GPU available, we fall back to CPU based operation if not c.available_devices: c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if not FLAGS.checkpoint_dir: FLAGS.checkpoint_dir = xdg.save_data_path(os.path.join('deepspeech', 'checkpoints')) if FLAGS.load not in ['last', 'best', 'init', 'auto']: FLAGS.load = 'auto' # Set default summary dir if not FLAGS.summary_dir: FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Size of audio window in samples c.audio_window_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_len / 1000) # Stride for feature computations in samples c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: if not os.path.exists(FLAGS.one_shot_infer): log_error('Path specified in --one_shot_infer is not a valid file.') exit(1) ConfigSingleton._config = c # pylint: disable=protected-access
def initialize_globals(): c = AttrDict() # ps and worker hosts required for p2p cluster setup FLAGS.ps_hosts = list(filter(len, FLAGS.ps_hosts.split(','))) FLAGS.worker_hosts = list(filter(len, FLAGS.worker_hosts.split(','))) # Create a cluster from the parameter server and worker hosts. c.cluster = tf.train.ClusterSpec({ 'ps': FLAGS.ps_hosts, 'worker': FLAGS.worker_hosts }) # The absolute number of computing nodes - regardless of cluster or single mode num_workers = max(1, len(FLAGS.worker_hosts)) # If replica numbers are negative, we multiply their absolute values with the number of workers if FLAGS.replicas < 0: FLAGS.replicas = num_workers * -FLAGS.replicas if FLAGS.replicas_to_agg < 0: FLAGS.replicas_to_agg = num_workers * -FLAGS.replicas_to_agg # The device path base for this node c.worker_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task_index) # This node's CPU device c.cpu_device = c.worker_device + '/cpu:0' # This node's available GPU devices c.available_devices = [ c.worker_device + gpu for gpu in get_available_gpus() ] # If there is no GPU available, we fall back to CPU based operation if 0 == len(c.available_devices): c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if len(FLAGS.checkpoint_dir) == 0: FLAGS.checkpoint_dir = xdg.save_data_path( os.path.join('deepspeech', 'checkpoints')) if FLAGS.benchmark_steps > 0: FLAGS.checkpoint_dir = None # Set default summary dir if len(FLAGS.summary_dir) == 0: FLAGS.summary_dir = xdg.save_data_path( os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Queues that are used to gracefully stop parameter servers. # Each queue stands for one ps. A finishing worker sends a token to each queue before joining/quitting. # Each ps will dequeue as many tokens as there are workers before joining/quitting. # This ensures parameter servers won't quit, if still required by at least one worker and # also won't wait forever (like with a standard `server.join()`). done_queues = [] for i, ps in enumerate(FLAGS.ps_hosts): # Queues are hosted by their respective owners with tf.device('/job:ps/task:%d' % i): done_queues.append( tf.FIFOQueue(1, tf.int32, shared_name=('queue%i' % i))) # Placeholder to pass in the worker's index as token c.token_placeholder = tf.placeholder(tf.int32) # Enqueue operations for each parameter server c.done_enqueues = [ queue.enqueue(c.token_placeholder) for queue in done_queues ] # Dequeue operations for each parameter server c.done_dequeues = [queue.dequeue() for queue in done_queues] if len(FLAGS.one_shot_infer) > 0: FLAGS.train = False FLAGS.test = False FLAGS.export_dir = '' if not os.path.exists(FLAGS.one_shot_infer): log_error( 'Path specified in --one_shot_infer is not a valid file.') exit(1) # Determine, if we are the chief worker c.is_chief = len( FLAGS.worker_hosts) == 0 or (FLAGS.task_index == 0 and FLAGS.job_name == 'worker') ConfigSingleton._config = c
def initialize_globals(): c = AttrDict() # CPU device c.cpu_device = '/cpu:0' # Available GPU devices c.available_devices = get_available_gpus() # If there is no GPU available, we fall back to CPU based operation if 0 == len(c.available_devices): c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if len(FLAGS.checkpoint_dir) == 0: FLAGS.checkpoint_dir = xdg.save_data_path( os.path.join('deepspeech', 'checkpoints')) if FLAGS.load not in ['last', 'best', 'init', 'auto']: FLAGS.load = 'auto' # Set default summary dir if len(FLAGS.summary_dir) == 0: FLAGS.summary_dir = xdg.save_data_path( os.path.join('deepspeech', 'summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label if len(FLAGS.one_shot_infer) > 0: FLAGS.train = False FLAGS.test = False FLAGS.export_dir = '' if not os.path.exists(FLAGS.one_shot_infer): log_error( 'Path specified in --one_shot_infer is not a valid file.') exit(1) ConfigSingleton._config = c
def initialize_globals(): c = AttrDict() # The absolute number of computing nodes - regardless of cluster or single mode num_workers = 1 # The device path base for this node c.worker_device = '/job:%s/task:%d' % ('localhost', 0) # This node's CPU device c.cpu_device = c.worker_device + '/cpu:0' # This node's available GPU devices c.available_devices = [c.worker_device + gpu for gpu in get_available_gpus()] # If there is no GPU available, we fall back to CPU based operation if 0 == len(c.available_devices): c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if len(FLAGS.checkpoint_dir) == 0: FLAGS.checkpoint_dir = xdg.save_data_path(os.path.join('deepspeech','checkpoints')) # Set default summary dir if len(FLAGS.summary_dir) == 0: FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech','summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Determine, if we are the chief worker c.is_chief = True ConfigSingleton._config = c
def initialize_globals(): c = AttrDict() # CPU device c.cpu_device = '/cpu:0' # Available GPU devices c.available_devices = get_available_gpus() # If there is no GPU available, we fall back to CPU based operation if not c.available_devices: c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if not FLAGS.checkpoint_dir: FLAGS.checkpoint_dir = xdg.save_data_path( os.path.join('deepspeech', 'checkpoints')) if FLAGS.load not in ['last', 'best', 'init', 'auto']: FLAGS.load = 'auto' # Set default summary dir if not FLAGS.summary_dir: FLAGS.summary_dir = xdg.save_data_path( os.path.join('deepspeech', 'summaries')) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Size of audio window in samples c.audio_window_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_len / 1000) # Stride for feature computations in samples c.audio_step_samples = FLAGS.audio_sample_rate * (FLAGS.feature_win_step / 1000) if FLAGS.one_shot_infer: if not os.path.exists(FLAGS.one_shot_infer): log_error( 'Path specified in --one_shot_infer is not a valid file.') exit(1) ConfigSingleton._config = c # pylint: disable=protected-access
def initialize_globals(): c = AttrDict() # ps and worker hosts required for p2p cluster setup FLAGS.ps_hosts = list(filter(len, FLAGS.ps_hosts.split(','))) FLAGS.worker_hosts = list(filter(len, FLAGS.worker_hosts.split(','))) # Create a cluster from the parameter server and worker hosts. c.cluster = tf.train.ClusterSpec({'ps': FLAGS.ps_hosts, 'worker': FLAGS.worker_hosts}) # The absolute number of computing nodes - regardless of cluster or single mode num_workers = max(1, len(FLAGS.worker_hosts)) # If replica numbers are negative, we multiply their absolute values with the number of workers if FLAGS.replicas < 0: FLAGS.replicas = num_workers * -FLAGS.replicas if FLAGS.replicas_to_agg < 0: FLAGS.replicas_to_agg = num_workers * -FLAGS.replicas_to_agg # The device path base for this node c.worker_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task_index) # This node's CPU device c.cpu_device = c.worker_device + '/cpu:0' # This node's available GPU devices c.available_devices = [c.worker_device + gpu for gpu in get_available_gpus()] # If there is no GPU available, we fall back to CPU based operation if 0 == len(c.available_devices): c.available_devices = [c.cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate # Set default checkpoint dir if len(FLAGS.checkpoint_dir) == 0: FLAGS.checkpoint_dir = xdg.save_data_path(os.path.join('deepspeech','checkpoints')) # Set default summary dir if len(FLAGS.summary_dir) == 0: FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech','summaries')) # Standard session configuration that'll be used for all new sessions. c.session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement, inter_op_parallelism_threads=FLAGS.inter_op_parallelism_threads, intra_op_parallelism_threads=FLAGS.intra_op_parallelism_threads) c.alphabet = Alphabet(os.path.abspath(FLAGS.alphabet_config_path)) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features c.n_input = 26 # TODO: Determine this programmatically from the sample rate # The number of frames in the context c.n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers c.n_hidden = FLAGS.n_hidden c.n_hidden_1 = c.n_hidden c.n_hidden_2 = c.n_hidden c.n_hidden_5 = c.n_hidden # LSTM cell state dimension c.n_cell_dim = c.n_hidden # The number of units in the third layer, which feeds in to the LSTM c.n_hidden_3 = c.n_cell_dim # Units in the sixth layer = number of characters in the target language plus one c.n_hidden_6 = c.alphabet.size() + 1 # +1 for CTC blank label # Queues that are used to gracefully stop parameter servers. # Each queue stands for one ps. A finishing worker sends a token to each queue before joining/quitting. # Each ps will dequeue as many tokens as there are workers before joining/quitting. # This ensures parameter servers won't quit, if still required by at least one worker and # also won't wait forever (like with a standard `server.join()`). done_queues = [] for i, ps in enumerate(FLAGS.ps_hosts): # Queues are hosted by their respective owners with tf.device('/job:ps/task:%d' % i): done_queues.append(tf.FIFOQueue(1, tf.int32, shared_name=('queue%i' % i))) # Placeholder to pass in the worker's index as token c.token_placeholder = tf.placeholder(tf.int32) # Enqueue operations for each parameter server c.done_enqueues = [queue.enqueue(c.token_placeholder) for queue in done_queues] # Dequeue operations for each parameter server c.done_dequeues = [queue.dequeue() for queue in done_queues] if len(FLAGS.one_shot_infer) > 0: FLAGS.train = False FLAGS.test = False FLAGS.export_dir = '' if not os.path.exists(FLAGS.one_shot_infer): log_error('Path specified in --one_shot_infer is not a valid file.') exit(1) # Determine, if we are the chief worker c.is_chief = len(FLAGS.worker_hosts) == 0 or (FLAGS.task_index == 0 and FLAGS.job_name == 'worker') ConfigSingleton._config = c
def initialize_globals(): # ps and worker hosts required for p2p cluster setup FLAGS.ps_hosts = list(filter(len, FLAGS.ps_hosts.split(','))) FLAGS.worker_hosts = list(filter(len, FLAGS.worker_hosts.split(','))) # Determine, if we are the chief worker global is_chief is_chief = len(FLAGS.worker_hosts) == 0 or (FLAGS.task_index == 0 and FLAGS.job_name == 'worker') # The absolute number of computing nodes - regardless of cluster or single mode global num_workers num_workers = max(1, len(FLAGS.worker_hosts)) # Create a cluster from the parameter server and worker hosts. global cluster cluster = tf.train.ClusterSpec({'ps': FLAGS.ps_hosts, 'worker': FLAGS.worker_hosts}) # If replica numbers are negative, we multiply their absolute values with the number of workers if FLAGS.replicas < 0: FLAGS.replicas = num_workers * -FLAGS.replicas if FLAGS.replicas_to_agg < 0: FLAGS.replicas_to_agg = num_workers * -FLAGS.replicas_to_agg # The device path base for this node global worker_device worker_device = '/job:%s/task:%d' % (FLAGS.job_name, FLAGS.task_index) # This node's CPU device global cpu_device cpu_device = worker_device + '/cpu:0' # This node's available GPU devices global available_devices available_devices = [worker_device + gpu for gpu in get_available_gpus()] # If there is no GPU available, we fall back to CPU based operation if 0 == len(available_devices): available_devices = [cpu_device] # Set default dropout rates if FLAGS.dropout_rate2 < 0: FLAGS.dropout_rate2 = FLAGS.dropout_rate if FLAGS.dropout_rate3 < 0: FLAGS.dropout_rate3 = FLAGS.dropout_rate if FLAGS.dropout_rate6 < 0: FLAGS.dropout_rate6 = FLAGS.dropout_rate global dropout_rates dropout_rates = [ FLAGS.dropout_rate, FLAGS.dropout_rate2, FLAGS.dropout_rate3, FLAGS.dropout_rate4, FLAGS.dropout_rate5, FLAGS.dropout_rate6 ] global no_dropout no_dropout = [ 0.0 ] * 6 # Set default checkpoint dir if len(FLAGS.checkpoint_dir) == 0: FLAGS.checkpoint_dir = xdg.save_data_path(os.path.join('deepspeech','checkpoints')) # Set default summary dir if len(FLAGS.summary_dir) == 0: FLAGS.summary_dir = xdg.save_data_path(os.path.join('deepspeech','summaries')) # Standard session configuration that'll be used for all new sessions. global session_config session_config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=FLAGS.log_placement) # Geometric Constants # =================== # For an explanation of the meaning of the geometric constants, please refer to # doc/Geometry.md # Number of MFCC features global n_input n_input = 26 # TODO: Determine this programatically from the sample rate # The number of frames in the context global n_context n_context = 9 # TODO: Determine the optimal value using a validation data set # Number of units in hidden layers global n_hidden n_hidden = FLAGS.n_hidden global n_hidden_1 n_hidden_1 = n_hidden global n_hidden_2 n_hidden_2 = n_hidden global n_hidden_5 n_hidden_5 = n_hidden # LSTM cell state dimension global n_cell_dim n_cell_dim = n_hidden # The number of units in the third layer, which feeds in to the LSTM global n_hidden_3 n_hidden_3 = 2 * n_cell_dim # The number of characters in the target language plus one global n_character n_character = 29 # TODO: Determine if this should be extended with other punctuation # The number of units in the sixth layer global n_hidden_6 n_hidden_6 = n_character # Assign default values for standard deviation for var in ['b1', 'h1', 'b2', 'h2', 'b3', 'h3', 'b5', 'h5', 'b6', 'h6']: val = getattr(FLAGS, '%s_stddev' % var) if val is None: setattr(FLAGS, '%s_stddev' % var, FLAGS.default_stddev) # Queues that are used to gracefully stop parameter servers. # Each queue stands for one ps. A finishing worker sends a token to each queue befor joining/quitting. # Each ps will dequeue as many tokens as there are workers before joining/quitting. # This ensures parameter servers won't quit, if still required by at least one worker and # also won't wait forever (like with a standard `server.join()`). global done_queues done_queues = [] for i, ps in enumerate(FLAGS.ps_hosts): # Queues are hosted by their respective owners with tf.device('/job:ps/task:%d' % i): done_queues.append(tf.FIFOQueue(1, tf.int32, shared_name=('queue%i' % i))) # Placeholder to pass in the worker's index as token global token_placeholder token_placeholder = tf.placeholder(tf.int32) # Enqueue operations for each parameter server global done_enqueues done_enqueues = [queue.enqueue(token_placeholder) for queue in done_queues] # Dequeue operations for each parameter server global done_dequeues done_dequeues = [queue.dequeue() for queue in done_queues]
def main(): num_gpus = get_available_gpus() print('Number of GPUs available: ' + str(num_gpus)) conf = load_configuration(CONFIGURATION_FILENAME) main_conf = conf['pareto_configuration'] x_flag = main_conf['x_axis'] max_x_power = main_conf['max_x_power'] min_x_power = main_conf['min_x_power'] y_flag = main_conf['y_axis'] coeff_delta = 0.01 delta = 1 partial_exec_command = main_conf['exec_command'] success_set = {} failure_set = {} ended_dict = {} success_json_filename = main_conf['success_json'] failure_json_filename = main_conf['failure_json'] ended_json_filename = main_conf['ended_json'] batch_axis = xAxis(base=2, initial_power=min_x_power, last_power=max_x_power) cwd = main_conf['cwd_command'] post_exec_command = main_conf['post_exec_command'] p = 0 r = int(main_conf['max_y']) q = (p+r)/2 if os.path.isfile(success_json_filename): with open(success_json_filename) as json_read: success_set = json.load(json_read) json_read.close() if os.path.isfile(failure_json_filename): with open(failure_json_filename) as json_read: failure_set = json.load(json_read) json_read.close() for current_batch in batch_axis: real_current_batch = int(str(current_batch)) * num_gpus if str(real_current_batch) in ended_dict: continue if str(real_current_batch) in success_set: p = int(float(success_set[str(real_current_batch)])) if str(real_current_batch) in failure_set: r = int(float(failure_set[str(real_current_batch)])) delta = 1 + math.ceil(p * coeff_delta) while (r-p) > delta: q = (p+r)/2 exec_command = partial_exec_command + ' ' + y_flag + ' ' + str(int(q)) + ' ' + x_flag + ' ' + str(current_batch) print("Current iteration: Batch size: " + str(real_current_batch) + " Current number of hidden layers: " + str(q)) print("Current command : " + exec_command) # time.sleep(120) print('start') # proc = Popen(exec_command, shell=True, stdout=PIPE, cwd=cwd) proc = Popen(exec_command, shell=True, cwd=cwd) # stdout = proc.stdout.readlines() res = proc.wait() return_code = proc.returncode subprocess.call(post_exec_command, shell=True, executable="/bin/bash") success = -1 if int(return_code) == 0: success = True p = q+1 if os.path.isfile(success_json_filename): with open(success_json_filename) as json_read: success_set = json.load(json_read) json_read.close() if str(real_current_batch) in success_set: # Overwrite the highest succeeded number of hidden layer if int(float(success_set[str(real_current_batch)])) < int(float(q)): success_set[str(real_current_batch)] = q else: success_set[str(real_current_batch)] = q with open(success_json_filename, 'w') as json_file: json_file.write(json.dumps(success_set)) json_file.close() else: success = False r = q-1 failure_set[str(real_current_batch)] = q if os.path.isfile(failure_json_filename): with open(failure_json_filename) as json_read: failure_set = json.load(json_read) json_read.close() if str(real_current_batch) in failure_set: # Overwrite the lowest failed number of hidden layer if int(float(failure_set[str(real_current_batch)])) > int(float(q)): failure_set[str(real_current_batch)] = q else: failure_set[str(real_current_batch)] = q with open(failure_json_filename, 'w') as json_file: json_file.write(json.dumps(failure_set)) json_file.close() print("Last iteration success: " + str(success) + " Batch size: " + str(real_current_batch) + " N hidden layer: " + str(q)) delta = 1 + math.ceil(p * coeff_delta) if os.path.isfile(ended_json_filename): with open(ended_json_filename) as json_read: ended_dict = json.load(json_read) json_read.close() ended_dict[str(real_current_batch)] = 1 with open(ended_json_filename, 'w') as json_file: json_file.write(json.dumps(ended_dict)) json_file.close() p = 0 #r = q print(success_set) plt = create_graph(success_set) plt.savefig('success_set.png') plt.show() return 0