async def main_async(data_path, num_top, num_sites, http_timeout): ''' Main routine is main entry point of program. ''' with Timer() as program_timer: with Timer() as gather_timer: site_info = await Gatherer.get_site_info_async( data_path, num_sites, http_timeout) logging.info(f'--- gather duration: {gather_timer.interval}') with Timer() as analysis_timer: site_report = Analyzer.get_site_report(site_info) logging.info( f'--- site analysis duration: {analysis_timer.interval} seconds ---' ) logging.info(f'--- site analysis report: --- \n{site_report}') with Timer() as analysis_timer: header_report = Analyzer.get_header_report(site_info, num_top) logging.info( f'--- header analysis duration: {analysis_timer.interval} seconds ---' ) logging.info(f'--- header analysis report: --- \n{header_report}') logging.info(f'--- program duration: {program_timer.interval} seconds ---')
def t_cifs_domain(): nas = getNasServer() dns = getDns(nas) interfaces = [fi for fi in FI.list() if fi().parent == nas()] for interface in interfaces[:-1]: interface().delete() interface = getInterface(nas) cifs = getCifsServer(nas) cifs().delete() Timer.add(30.0) cifs = getCifsServer(nas) interface().modify() cifs().modify() # with ethernet ports interface().modify() cifs().modify() # CIFS uses DNS dns().delete(is_error=True) interface().delete() # no interface cifs().delete(is_error=True) getInterface(nas) cifs().delete()
def __init__(self, n_in, n_hids, low_gru_size, n_out, inps=None, n_layers=None, dropout=None, seq_len=None, learning_rule=None, weight_initializer=None, bias_initializer=None, activ=None, use_cost_mask=True, noise=False, use_hint_layer=False, use_average=False, theano_function_mode=None, use_positional_encoding=False, use_inv_cost_mask=False, batch_size=32, use_noise=False, name=None): self.n_in = n_in self.n_hids = n_hids self.n_out = n_out self.low_gru_size = low_gru_size self.n_layers = n_layers self.inps = inps self.noise = noise self.seq_len = seq_len self.use_cost_mask = use_cost_mask selfearning_rule = learning_rule self.dropout = dropout self.use_average = use_average self.batch_size = batch_size self.use_noise = use_noise self.train_timer = Timer("Training function") self.grads_timer = Timer("Computing the grads") self.theano_function_mode = theano_function_mode self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer self.use_average = use_average self.use_positional_encoding = use_positional_encoding self.use_inv_cost_mask = use_inv_cost_mask self.eps = 1e-8 self.activ = activ self.out_layer_in = self.n_hids if name is None: raise ValueError("name should not be empty.") self.reset() self.name = name
def setUp(self): self.memory = Memory(0x1000) self.sound_timer = Timer(freq=60) self.delay_timer = Timer(freq=60) self.cpu = Cpu(self.memory, None, delay_timer=self.delay_timer, sound_timer=self.sound_timer)
def compute_time_train(model, loss_fun): """Computes precise model forward + backward time using dummy data.""" # Use train mode model.train() # Generate a dummy mini-batch and copy data to GPU im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TRAIN.BATCH_SIZE / cfg.NUM_GPUS) inputs = torch.rand(batch_size, 3, im_size, im_size).cuda(non_blocking=False) labels = torch.zeros(batch_size, dtype=torch.int64).cuda(non_blocking=False) # Cache BatchNorm2D running stats bns = [m for m in model.modules() if isinstance(m, torch.nn.BatchNorm2d)] bn_stats = [[bn.running_mean.clone(), bn.running_var.clone()] for bn in bns] # Compute precise forward backward pass time fw_timer, bw_timer = Timer(), Timer() total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER for cur_iter in range(total_iter): # Reset the timers after the warmup phase if cur_iter == cfg.PREC_TIME.WARMUP_ITER: fw_timer.reset() bw_timer.reset() # Forward fw_timer.tic() _, preds, _ = model(inputs) loss = loss_fun(preds, labels) torch.cuda.synchronize() fw_timer.toc() # Backward bw_timer.tic() loss.backward() torch.cuda.synchronize() bw_timer.toc() # Restore BatchNorm2D running stats for bn, (mean, var) in zip(bns, bn_stats): bn.running_mean, bn.running_var = mean, var return fw_timer.average_time, bw_timer.average_time
def apply_list(self, **kwargs): Timer.add(self.time) result = [] for _, value in OBJECTS[self.type].items(): if len(kwargs): for filter_key, filter_value in kwargs.items(): if hasattr(value, filter_key) and getattr(value, filter_key) == filter_value: result.append(value) else: result.append(value) return result
def __init__(self, max_iter): self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full test set) self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0
def __init__(self): self._memory = Memory(0x1000) self._display = Display(64, 32) self._delay_timer = Timer(freq=60) self._sound_timer = Timer(freq=60) self._sound = Sound(self._sound_timer) self._cpu = Cpu(self._memory, self._display, delay_timer=self._delay_timer, sound_timer=self._sound_timer) self._fps_time = datetime.now() pygame.init()
def t_cifs_skip_unjoin_reuse(): nas = getNasServer() getDns(nas) getInterface(nas) cifs = getCifsServer(nas) cifs().modify() cifs().modify() cifs().delete() Timer.add(30.0) cifs = getCifsServer(nas) cifs().delete() Timer.add(30.0) getCifsServer(nas)
def __init__(self, n_in, n_out, bow_size, weight_initializer=None, use_index_jittering=False, bias_initializer=None, max_fact_len=12, max_seq_len=250, dropout=None, batch_size=None, learning_rule=None, share_inp_out_weights=False, n_steps=1, inps=None, use_noise=False, theano_function_mode=None, rng=None, name=None): self.n_in = n_in self.n_out = n_out self.bow_size = bow_size self.use_index_jittering = use_index_jittering self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer self.share_inp_out_weights = share_inp_out_weights self.rng = rng self.inps = inps self.dropout = dropout self.batch_size = batch_size self.learning_rule = learning_rule self.theano_function_mode = theano_function_mode self.eps = 1e-7 self.max_fact_len = max_fact_len self.max_seq_len = max_seq_len self.n_steps = n_steps self.use_noise = use_noise self.name = name assert n_steps > 0, "Illegal value has been provided for n_steps." self.train_timer = Timer("Training function") self.grads_timer = Timer("Computing the grads") self.updates = {}
def __init__(self, epoch_iters): self.epoch_iters = epoch_iters self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.desc_loss = ScalarMeter(cfg.LOG_PERIOD) self.desc_loss_total = 0.0 self.att_loss = ScalarMeter(cfg.LOG_PERIOD) self.att_loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_att_top1_mis = 0 self.num_att_top5_mis = 0 self.num_samples = 0
class PomodoroController(object): def __init__(self): self._process = PomodoroProcess() def start_pomodoro(self): self._process.start() self._trigger_Timer() def next_status(self): self._process.next_status() self._trigger_Timer() def _trigger_Timer(self): actual_status = self._process.get_status() self._Timer = Timer(actual_status.get_name()) self._Timer.set_duration(actual_status.get_duration()) self._Timer.start_timer(self.info_status()) def info_status(self): return self._process.get_status()
def build_model(self, configs): timer = Timer() timer.start() for layer in configs['model']['layers']: neurons = layer['neurons'] if 'neurons' in layer else None dropout_rate = layer['rate'] if 'rate' in layer else None activation = layer['activation'] if 'activation' in layer else None return_seq = layer['return_seq'] if 'return_seq' in layer else None input_timesteps = layer[ 'input_timesteps'] if 'input_timesteps' in layer else None input_dim = layer['input_dim'] if 'input_dim' in layer else None if layer['type'] == 'dense': self.model.add(Dense(neurons, activation=activation)) if layer['type'] == 'lstm': self.model.add( LSTM(neurons, input_shape=(input_timesteps, input_dim), return_sequences=return_seq)) if layer['type'] == 'dropout': self.model.add(Dropout(dropout_rate)) self.model.compile(loss=configs['model']['loss'], optimizer=configs['model']['optimizer']) print('[Model] Model Compiled') timer.stop()
def compute_time_loader(data_loader): """Computes loader time.""" timer = Timer() loader.shuffle(data_loader, 0) data_loader_iterator = iter(data_loader) total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER total_iter = min(total_iter, len(data_loader)) for cur_iter in range(total_iter): if cur_iter == cfg.PREC_TIME.WARMUP_ITER: timer.reset() timer.tic() next(data_loader_iterator) timer.toc() return timer.average_time
def __init__(self): super().__init__() # read pre-trained models for predicting two different periodic functions. self.forward_model = keras.models.load_model("../model/sine.h5") self.backward_model = keras.models.load_model("../model/spiked.h5") self.input_danger.pipe(ops.zip(self.input_opportunity, self.input_flip))\ .subscribe(lambda x: setattr(self, "last_input", x)) Timer().ticks.pipe( ops.filter(lambda x: x % self.muscle_tick_rate == 0), ops.map(lambda x: self.input_to_prediction(self.last_input)), ops.map(lambda x: self.prediction_to_stimuli(x)))\ .subscribe(lambda x: self.output_muscle_stimuli.on_next(x))
def __init__(self, demo_mode=False): """ Sets up the environment. :param demo_mode: activate demo_mode to cycle through the images in a set order instead of randomly. Useful for plotting an initial demo graph, for verifying correctness of decisions made by the agent. """ self.demo_mode = demo_mode # set up visual feedback, by periodically providing random cat/dog image Timer().ticks\ .pipe( ops.filter(lambda x: x % self.image_tick_rate == 0), ops.map(lambda x: self.read_random_image()) )\ .subscribe(lambda img: self.visual_feedback.on_next(img))
def compute_time_eval(model): """Computes precise model forward test time using dummy data.""" # Use eval mode model.eval() # Generate a dummy mini-batch and copy data to GPU im_size, batch_size = cfg.TRAIN.IM_SIZE, int(cfg.TEST.BATCH_SIZE / cfg.NUM_GPUS) inputs = torch.zeros(batch_size, 3, im_size, im_size).cuda(non_blocking=False) # Compute precise forward pass time timer = Timer() total_iter = cfg.PREC_TIME.NUM_ITER + cfg.PREC_TIME.WARMUP_ITER for cur_iter in range(total_iter): # Reset the timers after the warmup phase if cur_iter == cfg.PREC_TIME.WARMUP_ITER: timer.reset() # Forward timer.tic() model(inputs) torch.cuda.synchronize() timer.toc() return timer.average_time
def assess(funcs): Timer.refresh() last_name, last_mod = funcs[-1] prev_key = ''.join([name for name, module in funcs[:-1]]) key = prev_key + last_name if prev_key in RESULTS: l_time, pool = RESULTS[prev_key] restore(pool) Timer.enable() getattr(last_mod, last_name)() time = l_time + Timer.time() else: Timer.enable() [getattr(module, name)() for name, module in funcs] time = Timer.time() pool = clearAll() #RESULTS[key] = (time, pool()) return time
def train(self, x, y, epochs, batch_size, save_dir): timer = Timer() timer.start() print('[Model] Training Started') print('[Model] %s epochs, %s batch size' % (epochs, batch_size)) save_fname = os.path.join( save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs))) callbacks = [ EarlyStopping(monitor='val_loss', patience=2), ModelCheckpoint(filepath=save_fname, monitor='val_loss', save_best_only=True) ] self.model.fit(x, y, epochs=epochs, batch_size=batch_size, callbacks=callbacks) self.model.save(save_fname) print('[Model] Training Completed. Model saved as %s' % save_fname) timer.stop()
def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_dir): timer = Timer() timer.start() print('[Model] Training Started') print('[Model] %s epochs, %s batch size, %s batches per epoch' % (epochs, batch_size, steps_per_epoch)) save_fname = os.path.join( save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs))) callbacks = [ ModelCheckpoint(filepath=save_fname, monitor='loss', save_best_only=True) ] self.model.fit(data_gen, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=callbacks, workers=1) print('[Model] Training Completed. Model saved as %s' % save_fname) timer.stop()
class WeaklySupervisedMemoryNet(Layer): """ An implementation of weakly supervised memory network paper. """ def __init__(self, n_in, n_out, bow_size, weight_initializer=None, use_index_jittering=False, bias_initializer=None, max_fact_len=12, max_seq_len=250, dropout=None, batch_size=None, learning_rule=None, share_inp_out_weights=False, n_steps=1, inps=None, use_noise=False, theano_function_mode=None, rng=None, name=None): self.n_in = n_in self.n_out = n_out self.bow_size = bow_size self.use_index_jittering = use_index_jittering self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer self.share_inp_out_weights = share_inp_out_weights self.rng = rng self.inps = inps self.dropout = dropout self.batch_size = batch_size self.learning_rule = learning_rule self.theano_function_mode = theano_function_mode self.eps = 1e-7 self.max_fact_len = max_fact_len self.max_seq_len = max_seq_len self.n_steps = n_steps self.use_noise = use_noise self.name = name assert n_steps > 0, "Illegal value has been provided for n_steps." self.train_timer = Timer("Training function") self.grads_timer = Timer("Computing the grads") self.updates = {} def init_params(self, use_noise=False, mdl_name=None): if not hasattr(self, "children") or not self.children: self.children = [] self.inp_bow_layer = BOWLayer(n_in=self.n_in, n_out=self.bow_size, seq_len=self.max_fact_len, use_inv_cost_mask=False, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("bow_layer")) self.inp_bow_layers = [self.inp_bow_layer] self.out_bow_layer = BOWLayer(n_in=self.n_in, n_out=self.bow_size, seq_len=self.max_fact_len, use_inv_cost_mask=False, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("out_bow_layer")) self.out_bow_layers = [self.out_bow_layer] if not self.share_inp_out_weights: for i in xrange(1, self.n_steps): self.inp_bow_layers += [BOWLayer(n_in=self.n_in, n_out=self.bow_size, seq_len=self.max_fact_len, use_inv_cost_mask=False, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("bow_layer_" + str(i)))] self.out_bow_layers += [BOWLayer(n_in=self.n_in, n_out=self.bow_size, use_inv_cost_mask=False, seq_len=self.max_fact_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("out_bow_layer_" + str(i)))] self.q_embed = BOWLayer(n_in=self.n_in, n_out=self.bow_size, use_inv_cost_mask=False, seq_len=self.max_fact_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("q_embed")) self.out_layer = AffineLayer(n_in=self.bow_size, n_out=self.n_out, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("out_layer")) self.children.extend(self.inp_bow_layers) self.children.extend(self.out_bow_layers) self.children.append(self.out_layer) self.children.append(self.q_embed) self.merge_params() # These are the parameters for the temporal encoding thing: self.T_ins = [] self.T_outs = [] nsteps = 1 if self.share_inp_out_weights else self.n_steps #""" for i in xrange(nsteps): T_in = self.weight_initializer(self.max_seq_len, self.bow_size) self.params[self.pname("TE_in_%d" % i)] = T_in self.T_ins.append(self.params[self.pname("TE_in_%d" % i)]) T_out = self.weight_initializer(self.max_seq_len, self.bow_size) self.params[self.pname("TE_out_%d" % i)] = T_out self.T_outs.append(self.params[self.pname("TE_out_%d" % i)]) #""" if mdl_name: logger.info("Reloading model from %s." % mdl_name) self.params.load(mdl_name) [child.use_params(self.params) for child in self.children] def get_cost(self, use_noise=False, mdl_name=None): X = self.inps[0] q = self.inps[1] y = self.inps[2] mask = self.inps[3] cmask = None probs = self.fprop(X, q, cmask=cmask, mask=mask, use_noise=use_noise, mdl_name=mdl_name) self.cost, self.errors = nll(y, probs) return self.cost, self.errors def get_inspect_fn(self, mdl_name=None): logger.info("Compiling inspect function.") probs, ntm_outs = self.fprop(use_noise=False, mdl_name=mdl_name) inspect_fn = theano.function([self.inps[0], self.inps[1], self.inps[2], self.inps[3]], ntm_outs + [probs], on_unused_input='ignore', name=self.pname("inspect_fn")) return inspect_fn def get_valid_fn(self, mdl_name=None): logger.info("Compiling validation function.") self.cost, self.errors = self.get_cost(use_noise=False, mdl_name=mdl_name) valid_fn = theano.function(self.inps, [self.cost, self.errors], on_unused_input='ignore', name=self.pname("valid_fn")) return valid_fn def add_noise_to_params(self): for k, v in self.params.__dict__['params'].iteritems(): v_np = v.get_value(borrow=True) noise = global_rng.normal(0, 0.05, v_np.shape) self.params[k] = v_np + noise def get_train_fn(self, lr=None, mdl_name=None): if lr is None: lr = self.eps cost, errors = self.get_cost(use_noise=self.use_noise, mdl_name=mdl_name) params = self.params.values logger.info("Computing the gradients.") self.grads_timer.start() grads = safe_grad(cost, params) gnorm = sum(grad.norm(2) for _, grad in grads.iteritems()) updates, norm_up, param_norm = self.learning_rule.get_updates(learning_rate=lr, grads=grads) self.grads_timer.stop() logger.info(self.grads_timer) if not self.updates: self.updates = self.updates.update(updates) logger.info("Compiling the training function.") self.train_timer.start() self.updates = updates outs = [self.cost, gnorm, norm_up, param_norm] outs += [self.errors] train_fn = theano.function(self.inps, outs, updates=updates, mode=self.theano_function_mode, on_unused_input='ignore', name=self.pname("train_fn")) self.train_timer.stop() logger.info(self.train_timer) return train_fn def __get_bow_inps(self, x, q, mask=None, use_noise=False): inp_bow_outs, out_bow_outs = [], [] nsteps = 1 if self.share_inp_out_weights else self.n_steps for i in xrange(nsteps): inp_bow_outs.append(self.inp_bow_layers[i].fprop(x, amask=mask, deterministic=not use_noise)) out_bow_outs.append(self.out_bow_layers[i].fprop(x, amask=mask, deterministic=not use_noise)) return inp_bow_outs, out_bow_outs def dot_componentwise(self, x, u_t): if x.ndim == 3: u_t = u_t.dimshuffle('x', 0, 1) res = (x * u_t).sum(-1) return res def fprop(self, x, q, mask=None, qmask=None, cmask=None, use_noise=False, mdl_name=None): self.init_params(use_noise=use_noise, mdl_name=mdl_name) q_emb = self.q_embed.fprop(q, deterministic=not use_noise) amask = None if mask is not None and cmask is not None: amask = mask * TT.eq(cmask, 0) inp_bow_outs, out_bow_outs = self.__get_bow_inps(x, q, mask=amask, use_noise=use_noise) u_t = q_emb v_t = None if mask.ndim == 2 and \ inp_bow_outs[0].ndim == 3: mask = mask.dimshuffle(0, 1, 'x') for i in xrange(self.n_steps): if not self.share_inp_out_weights: inp_bow = mask * (inp_bow_outs[i] + self.T_ins[i].dimshuffle(0, 'x', 1)) out_bow = mask * (out_bow_outs[i] + self.T_outs[i].dimshuffle(0, 'x', 1)) else: inp_bow = mask * (inp_bow_outs[0] + self.T_ins[0].dimshuffle(0, 'x', 1)) out_bow = mask * (out_bow_outs[0] + self.T_outs[0].dimshuffle(0, 'x', 1)) if u_t.ndim == 2: u_t = u_t.dimshuffle(0, 1, 'x') sims = self.dot_componentwise(inp_bow, u_t) pre_soft = mask.dimshuffle(0, 1) * TT.exp(sims - sims.max(0)) ps = pre_soft / pre_soft.sum(axis=0, keepdims=True) ps = ps.dimshuffle(0, 1, 'x') v_t = (out_bow * ps).sum(0) u_t = u_t.dimshuffle(0, 1) + v_t new_out = u_t pre_logit = self.out_layer.fprop(new_out) probs = Softmax(pre_logit) return probs
class Logging: """Used to log data streams to a CSV file""" file = None timer = Timer() # A list of Loggers loggers = [] def __init__(self, file_path="../log/logfile.txt", flush_rate=10): """ Initializes the Logging class :param file_path: file path of csv file to log to :param flush_rate: rate in ticks after which the logger should write to the csv file """ self.file_path = file_path self.flush_rate = flush_rate class Logger: """Holds an observable of logged values.""" name = "" # descriptive label for logged values last_val = "" # last logged value observable = None # stream of logged values def __init__(self, name, obs): self.name = name self.observable = obs obs.subscribe(lambda x: setattr(self, "last_val", x)) def on_tick(self, tick): self.write_line(tick) if tick % self.flush_rate == 0: self.file.flush() def start_logging(self): """Start logging to console and file""" self.file = open(self.file_path, "w") self.write_header() self.timer.ticks.subscribe(lambda x: self.on_tick(x)) def add_logger(self, logger): """Add a logger. Imagine it as another column in the CSV file.""" self.loggers.append(logger) def write_header(self): """Write the header of the CSV file, based on the Loggers' names""" header = "tick" header += ';'.join(map(lambda x: x.name, self.loggers)) self.file.write(header + "\n") print(header) def write_line(self, tick): """Write a single line to the log CSV file.""" line = f"{tick};" line += ';'.join(map(lambda x: str(x.last_val), self.loggers)) # reset last value for logger in self.loggers: logger.last_val = "" self.file.write(line + "\n") print(line)
def _trigger_Timer(self): actual_status = self._process.get_status() self._Timer = Timer(actual_status.get_name()) self._Timer.set_duration(actual_status.get_duration()) self._Timer.start_timer(self.info_status())
def end(self, success: bool, exception=None): self.info.end_time = str(datetime.datetime.now()) self.info.elapsed_time = str(Timer.get_elapsed_time()) self.info.success = success if exception: self.exception_str = str(exception)
ldap().modify( verifyServerCertificate = True ) ldap().modify( verifyServerCertificate = False ) ldap().modify( is_error = True, verifyServerCertificate = True ) def t_ldap_upload_content_cacert(): nas = getNasServer() getInterface(nas) ldap = getLdap(nas, protocol = "LDAPS", verifyServerCertificate = False) nas().upload() if __name__ == '__main__': NTP.create(SYSTEM) Timer.enable() t_ldap_simple() t_ldap_kerberos_credentials() t_ldap_kerberos() t_ldap_kerberos_use_cifs() t_ldap_cifs_using_other_account() t_ldap_auto_switch() t_ldap_dep_errors() t_ldap_basedn() t_ldap_profiledn() t_ldap_binddn() t_ldap_bind_password() t_ldap_ip_addresses() t_ldap_warnings() t_ldap_download_upload_conf() t_ldap_download_upload_cacert()
class Kernel: def __init__(self): self.timer = Timer() self.scheduler = Scheduler() self.memory = RAM() self.processes = [] self.cpus = [CPU(1)] # print emulated specs def bootUp(self): print(f'Total CPU cores: {len(self.cpus)}') print(f'Total RAM: {len(self.memory.memory)/2**20} MB') print(f'Clock: {self.timer.clock} Hz') print(f'Quantum: {self.timer.quantum} ms') print(f'Page and frame size: {self.memory.mmu.pageSize/2**10} KB\n') # read processes from processes.txt def loadProcesses(self): try: with open('processes.txt', 'r') as f: for line in f: data = line.strip().split(',') self.processes.append(Process(int(data[0]), int(data[1]), int(data[2]), int(data[3]))) except IOError: print("File doesn't exist") finally: f.close() # emulates scheduling and allocation of memory for a processes based on a scheduling algorithm def schedulerRun(self): stats = Stats() while 1: self.timer.setInitTime() # checks for new processes to add to the ready queue and allocates memory on arrival for process in self.processes: if process.arrivalTime == self.timer.ticks: self.scheduler.addProcess(process) stats.output(self.timer, process, 'added') for address in process.memAddresses: self.memory.mmu.writeMemory(process, address, randint(0,256)) # sorts ready queue (non preemptive algorithms only) self.scheduler.sortProcesses() # simulate multiprocessing by using multiple cpus (if set) for cpu in self.cpus: try: if cpu.currentProcess == None: # if cpu not running a process fetch the next available one cpu.fetchProcess(self.scheduler.getAvailableProcess()) stats.output(self.timer, cpu.currentProcess, 'working', cpu.id) else: cpu.runProcess() # else if already assigned a process increase its runtime except IndexError: # if ready queue empty go to next cpu continue # handles finished process if cpu.currentProcess.isFinished(): stats.turnaroundTimes.append(self.timer.ticks - cpu.currentProcess.arrivalTime) stats.waitingTimes.append(stats.turnaroundTimes[-1] - cpu.currentProcess.burstTime) stats.output(self.timer, cpu.currentProcess, 'finished') # deallocate process' pages, remove process from cpu and readyqueue for page in list(cpu.currentProcess.pageTable): self.memory.mmu.deallocatePage(cpu.currentProcess, page) self.scheduler.removeProcess(cpu.currentProcess) cpu.freeCPU() # fetch next process try: self.scheduler.sortProcesses() cpu.fetchProcess(self.scheduler.getAvailableProcess()) stats.output(self.timer, cpu.currentProcess, 'working', cpu.id) self.timer.resetQuantum() except IndexError: # if ready queue is empty go to next cpu continue # handles a process time slice running out for preemptive algorithms # frees the cpu and releases the process, reorders the ready queue and fetches the next available process if (self.scheduler.strategy.name == 'RR' and self.timer.currentQuantum == self.timer.quantum): cpu.freeCPU() self.scheduler.strategy.sortProcesses() cpu.fetchProcess(self.scheduler.getAvailableProcess()) stats.output(self.timer, cpu.currentProcess, 'working', cpu.id) self.timer.resetQuantum() # handles process accessing cpu for the first time if (cpu.currentProcess.runTime == 0): stats.responseTimes.append(self.timer.ticks - cpu.currentProcess.arrivalTime) stats.output(self.timer, cpu.currentProcess, 'accessed') # when all cpu are finished, go to next millisecond self.timer.sleep() self.timer.tick() # when ready queue is empty end the simulation if(len(self.scheduler.readyQueue) == 0): break results = stats.finalStats() print('\n' + stats.finalStats() + '\n') self.varReset() return results # reset variables every simulation def varReset(self): self.processes = [] self.timer.ticks=self.timer.currentQuantum = 0 self.memory.mmu.swap = {} self.memory.mmu.history = [] # set number of cores to use for simulation def setCpus(self, n): self.cpus = [] for i in range(1, n+1): self.cpus.append(CPU(i))
def apply(self, node = None, is_error = False, **kwargs): if self.method == 'list': return self.apply_list(**kwargs) if is_error: Timer.add(self.error_time) return None Timer.add(self.time) if node is None or self.left is None: return None if type(node) is weakref.ReferenceType: node = node() node_id = None node_name = node.name if hasattr(node, 'id'): node_id = node.id node = self.__find_left_base(node) if self.is_applicable(node, node_name, node_id): # create new left_node = self.copy_left() right_node = self.copy_right() # merge def merge_diff_rec(left, right, orig, passed_node_id, passed_node_name): result = [] # some nodes are added in rule if len(left.children) < len(right.children): # find these nodes for right_child in right.children: found = False for left_child in left.children: if left_child == right_child: found = True break if not found: orig.add(right_child) result += self.__find_terminal(right_child) # some nodes are deleted in rule elif len(left.children) > len(right.children): for left_child in left.children: found = False # try to find the same symbol in right side of rule for right_child in right.children: if left_child == right_child: found = True break # if such symbol is not found it means that this symbol should be deleted if not found: # find and remove such symbol for inx in range(len(orig.children)): orig_child = orig.children[inx] if self.__ndeep_eq(left_child, orig_child) and orig_child.id == passed_node_id: orig.children.pop(inx) self.__delete_terminal_node(orig_child) break # if nodes are the same elif len(left.children) == len(right.children): orig_children = [] left_inx, right_inx, orig_inx = 0, 0, 0 # modify case if len(left.children) == len(right.children) == 0 and \ passed_node_id and orig.terminal and orig.id == passed_node_id: orig.__dict__.update(**kwargs) # no need id if name is None, but it's not set to None before, because # it need for checking modify case if passed_node_name is None: passed_node_id = None while left_inx < len(left.children): left_child = left.children[left_inx] right_child = right.children[right_inx] orig_child = orig.children[orig_inx] while orig_inx < len(orig.children): cmp_result = Rule.__ndeep_eq(left_child, orig_child) dfs_result = Rule.__dfs_by_id(orig_child, passed_node_name, passed_node_id) in_context = (orig_child.name, orig_child.terminal) in self.context if cmp_result and \ (passed_node_id is None or \ not passed_node_id is None and (dfs_result or in_context)): break orig_inx += 1 orig_child = orig.children[orig_inx] if len(orig.children) == orig_inx: break if left_child != right_child: orig.children[orig_inx] = None self.__delete_terminal_node(orig_child) # substitute the node from right tree orig.children[orig_inx] = right_child orig_child = right_child right_child.parent = orig if not left_child.terminal: result += self.__find_terminal(orig_child) orig_children.append(orig_child) left_inx += 1 right_inx += 1 orig_inx += 1 # We don't need the id below the objects with pointed name, # because there is no such nodes and hence there is no such id. if orig.name == passed_node_name: passed_node_name = None for left_child, right_child, orig_child in zip(left.children, right.children, orig_children): result.extend(merge_diff_rec(left_child, right_child, orig_child, passed_node_id, passed_node_name)) return result result = merge_diff_rec(left_node, right_node, node, node_id, node_name) for n in result: n().handle_id() n().type = self.type OBJECTS[n().name][n().id] = n # That is a case when diamond inheritance is applied if n().name != n().type: OBJECTS[n().type][n().id] = n if len(result) == 1: result[0]().__dict__.update(**kwargs) return result[0] else: return result return None
class WeaklySupervisedMemoryNetwork(Model): """ This is a class for weakly supervised memory network. """ def __init__(self, n_in, n_hids, low_gru_size, n_out, inps=None, n_layers=None, dropout=None, seq_len=None, learning_rule=None, weight_initializer=None, bias_initializer=None, activ=None, use_cost_mask=True, noise=False, use_hint_layer=False, use_average=False, theano_function_mode=None, use_positional_encoding=False, use_inv_cost_mask=False, batch_size=32, use_noise=False, name=None): self.n_in = n_in self.n_hids = n_hids self.n_out = n_out self.low_gru_size = low_gru_size self.n_layers = n_layers self.inps = inps self.noise = noise self.seq_len = seq_len self.use_cost_mask = use_cost_mask selfearning_rule = learning_rule self.dropout = dropout self.use_average = use_average self.batch_size = batch_size self.use_noise = use_noise self.train_timer = Timer("Training function") self.grads_timer = Timer("Computing the grads") self.theano_function_mode = theano_function_mode self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer self.use_average = use_average self.use_positional_encoding = use_positional_encoding self.use_inv_cost_mask = use_inv_cost_mask self.eps = 1e-8 self.activ = activ self.out_layer_in = self.n_hids if name is None: raise ValueError("name should not be empty.") self.reset() self.name = name def reset(self): self.children = [] self.params = Parameters() self.grulow_layer = None self.low_gru_layer = None self.gruup_layer = None self.gru_layer = None self.out_layer = None self.hint_layer = None self.bow_input = None self.bow_output = None self.updates = OrderedDict({}) def build_model(self, use_noise=False, mdl_name=None): self.bowin_layer = BOWLayer(n_in=self.n_in, n_out=self.emb_size, noise=self.noise, weight_initializer=self.wight_initializer, bias_initializer=self.bias_initializer, seq_len=self.seq_len, name=self.pname("bowin_layer")) self.bowout_layer = BOWLayer(n_in=self.n_in, n_out=self.emb_size, noise=self.noise, weight_initializer=self.wight_initializer, bias_initializer=self.bias_initializer, seq_len=self.seq_len, name=self.pname("bowout_layer")) self.qembed_layer = BOWLayer(n_in=self.n_in, n_out=self.emb_size, noise=self.noise, weight_initializer=self.wight_initializer, bias_initializer=self.bias_initializer, seq_len=self.seq_len, name=self.pname("qembed_layer")) if not self.out_layer: self.out_layer = AffineLayer( n_in=self.out_layer_in, n_out=self.n_out, noise=self.noise, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("out_layer")) if not self.children: self.children.append(self.bowin_layer) self.children.append(self.bowout_layer) self.children.append(self.qembed_layer) self.children.append(self.out_layer) self.merge_params() if mdl_name: logger.info("Reloading the model from %s. " % mdl_name) self.params.load(mdl_name) [child.use_params(self.params) for child in self.children] def get_cost(self, use_noise=False, mdl_name=None): probs, _ = self.fprop(use_noise=use_noise, mdl_name=mdl_name) y = self.inps[1] cmask = None if self.use_cost_mask: cmask = self.inps[3] self.cost, self.errors = nll(y, probs, cost_mask=cmask) return self.cost, self.errors def get_train_fn(self, lr=None, mdl_name=None): if lr is None: lr = self.eps cost, errors = self.get_cost(use_noise=self.use_noise, mdl_name=mdl_name) params = self.params.values logger.info("Computing the gradient graph.") self.grads_timer.start() grads = safe_grad(cost, params) gnorm = sum(grad.norm(2) for _, grad in grads.iteritems()) updates, norm_up, param_norm = \ self.learning_rule.get_updates(learning_rate=lr, grads = grads) self.grads_timer.stop() logger.info(self.grads_timer) if not self.updates: self.updates = self.updates.update(updates) outs = [self.cost, gnorm, norm_up, param_norm] outs += [self.errors] train_fn = theano.function(self.inps, outs, updates=updates, mode=self.theano_function_mode, name=self.pname("train_fn")) self.train_timer.stop() logger.info(self.train_timer) return train_fn def get_inspect_fn(self, mdl_name=None): logger.info("Compiling inspect function.") probs, h = self.fprop(use_noise=False, mdl_name=mdl_name) inspect_fn = theano.function([self.inps[0], self.inps[2]], [h, probs], name=self.pname("inspect_fn")) return inspect_fn def get_valid_fn(self, mdl_name=None): logger.info("Compiling validation function.") self.cost, self.errors = self.get_cost(use_noise=False, mdl_name=mdl_name) valid_fn = theano.function(self.inps, [self.cost, self.errors], name=self.pname("valid_fn")) return valid_fn def fprop(self, inps=None, use_mask=True, use_cmask=True, use_noise=False, mdl_name=None): self.build_model(use_noise=use_noise, mdl_name=mdl_name) if not inps: inps = self.inps X = inps[0] if use_mask: mask = inps[2] qmask = inps[3] if use_cmask: cmask = inps[4] assert (3 + sum([use_mask, use_cmask ])) == len(inps), "inputs have illegal shape." m0 = as_floatX(TT.gt(X, 0)) if cmask is not None: m1 = mask * TT.eq(cmask, 0) else: raise ValueError("Mask for the answers should not be empty.") dropOp = None low_inp_shp = (X.shape[0], X.shape[1] * X.shape[2], -1) Xr = X.reshape(low_inp_shp) grulow_inps = self.grulow_layer.fprop(Xr, deterministic=not use_noise) linps = [low_reset_below, low_gater_below, low_state_below] inp_shp = (X.shape[1], X.shape[2], -1) h0 = self.low_gru_layer.fprop(inps=linps, mask=m0, batch_size=self.batch_size) h0 = m1.dimshuffle(0, 1, 'x') * (h0.reshape( (X.shape[0], X.shape[1], X.shape[2], -1))[-1]).reshape(inp_shp) if self.dropout: if dropOp is None: dropOp = Dropout(dropout_prob=self.dropout) h0 = dropOp(h0, deterministic=not use_noise) gruup_inps = self.gruup_layer.fprop(h0, deterministic=not use_noise) reset_below = gruup_inps.values()[0].reshape(inp_shp) gater_below = gruup_inps.values()[1].reshape(inp_shp) state_below = gruup_inps.values()[2].reshape(inp_shp) uinps = [reset_below, gater_below, state_below] h1, _ = self.gru_layer.fprop(inps=uinps, maskf=m1, maskq=qmask, batch_size=self.batch_size) if self.dropout: if dropOp is None: dropOp = Dropout(dropout_prob=self.dropout) h1 = dropOp(h1, deterministic=not use_noise) out_layer = self.out_layer.fprop(h1, deterministic=not use_noise) self.probs = Softmax(out_layer) return self.probs, h1
class LSTMModel(Model): def __init__(self, n_in, n_hids, bow_size, n_out, inps=None, dropout=None, seq_len=None, learning_rule=None, weight_initializer=None, bias_initializer=None, learn_h0=False, deepout=None, activ=None, use_cost_mask=True, noise=False, use_hint_layer=False, use_average=False, theano_function_mode=None, use_positional_encoding=False, use_inv_cost_mask=False, batch_size=32, use_noise=False, name=None): self.n_in = n_in self.n_hids = n_hids self.n_out = n_out self.bow_size = bow_size self.inps = inps self.noise = noise self.seq_len = seq_len self.dropout = dropout self.use_cost_mask = use_cost_mask self.learning_rule = learning_rule self.bias_initializer = bias_initializer self.learn_h0 = learn_h0 self.use_average = use_average self.deepout = deepout self.batch_size = batch_size self.use_noise = use_noise self.train_timer = Timer("Training function") self.grads_timer = Timer("Computing the grads") self.theano_function_mode = theano_function_mode self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer self.use_average = use_average self.use_positional_encoding = use_positional_encoding self.use_inv_cost_mask = use_inv_cost_mask self.eps = 1e-8 self.activ = activ self.out_layer_in = self.n_hids if bow_size is None: raise ValueError("bow_size should be specified.") if name is None: raise ValueError("name should not be empty.") self.reset() self.name = name def reset(self): self.children = [] self.params = Parameters() self.bow_layer = None self.lstm_layer = None self.out_layer = None self.bowup_layer = None self.hint_layer = None self.updates = OrderedDict({}) def build_model(self, use_noise=False, mdl_name=None): if not self.bow_layer: self.bow_layer = BOWLayer( n_in=self.n_in, n_out=self.bow_size, seq_len=12, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("bow_layer")) if self.deepout: self.deepout_layer_qbow = AffineLayer( n_in=self.bow_size, n_out=self.deepout, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("deepout_qbow")) self.deepout_layer_ht = AffineLayer( n_in=self.n_hids, n_out=self.deepout, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("deepout_ht")) self.out_layer_in = self.deepout if not self.bowup_layer: cnames = ["forget_below", "input_below", "out_below", "cell_below"] nfout = len(cnames) self.cnames = map(lambda x: self.pname(x), cnames) self.bowup_layer = ForkLayer( n_in=self.bow_size, n_outs=tuple([self.n_hids for i in xrange(nfout)]), weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, names=self.cnames) if not self.lstm_layer: self.lstm_layer = LSTMLayer( n_in=self.n_hids, n_out=self.n_hids, seq_len=self.seq_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, activ=self.activ, learn_init_state=self.learn_h0, name=self.pname("lstm_layer")) if not self.out_layer: self.out_layer = AffineLayer( n_in=self.out_layer_in, n_out=self.n_out, noise=self.noise, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("ntm_out")) if not self.children: self.children.append(self.bowup_layer) self.children.append(self.bow_layer) self.children.append(self.lstm_layer) self.children.append(self.out_layer) if self.deepout: self.children.append(self.deepout_layer_qbow) self.children.append(self.deepout_layer_ht) self.merge_params() if mdl_name: logger.info("Reloading the model from %s. " % mdl_name) self.params.load(mdl_name) [child.use_params(self.params) for child in self.children] def get_cost(self, use_noise=False, mdl_name=None): probs, _ = self.fprop(use_noise=use_noise, mdl_name=mdl_name) y = self.inps[1] cmask = None if self.use_cost_mask: cmask = self.inps[3] self.cost, self.errors = nll(y, probs, cost_mask=cmask) return self.cost, self.errors def get_train_fn(self, lr=None, mdl_name=None): if lr is None: lr = self.eps cost, errors = self.get_cost(use_noise=self.use_noise, mdl_name=mdl_name) params = self.params.values logger.info("Computing the gradient graph.") self.grads_timer.start() grads = safe_grad(cost, params) gnorm = sum(grad.norm(2) for _, grad in grads.iteritems()) updates, norm_up, param_norm = self.learning_rule.get_updates( learning_rate=lr, grads=grads) self.grads_timer.stop() logger.info(self.grads_timer) if not self.updates: self.updates = self.updates.update(updates) outs = [self.cost, gnorm, norm_up, param_norm] outs += [self.errors] train_fn = theano.function(self.inps, outs, updates=updates, mode=self.theano_function_mode, name=self.pname("train_fn")) self.train_timer.stop() logger.info(self.train_timer) return train_fn def get_inspect_fn(self, mdl_name=None): logger.info("Compiling inspect function.") probs, h = self.fprop(use_noise=False, mdl_name=mdl_name) inspect_fn = theano.function([self.inps[0], self.inps[2]], [h, probs], name=self.pname("inspect_fn")) return inspect_fn def get_valid_fn(self, mdl_name=None): logger.info("Compiling validation function.") self.cost, self.errors = self.get_cost(use_noise=False, mdl_name=mdl_name) valid_fn = theano.function(self.inps, [self.cost, self.errors], name=self.pname("valid_fn")) return valid_fn def fprop(self, inps=None, use_mask=True, use_cmask=True, use_noise=False, mdl_name=None): self.build_model(use_noise=use_noise, mdl_name=mdl_name) if not inps: inps = self.inps X = inps[0] if use_mask: mask = inps[2] if use_cmask: cmask = inps[3] qmask = inps[4] assert (3 + sum([use_mask, use_cmask ])) == len(inps), "inputs have illegal shape." if cmask is not None: m = mask * TT.eq(cmask.reshape( (cmask.shape[0], cmask.shape[1])), 0) else: raise ValueError("Mask for the answers should not be empty.") bow_out = self.bow_layer.fprop(X, amask=m, qmask=qmask, deterministic=not use_noise) new_bow = TT.roll(bow_out, 1, axis=0) new_bow = TT.set_subtensor(new_bow[0], as_floatX(0)) bow_outs = self.bowup_layer.fprop(bow_out, deterministic=not use_noise) forget_below = bow_outs[self.cnames[0]].reshape( (X.shape[1], X.shape[2], -1)) input_below = bow_outs[self.cnames[1]].reshape( (X.shape[1], X.shape[2], -1)) output_below = bow_outs[self.cnames[2]].reshape( (X.shape[1], X.shape[2], -1)) cell_below = bow_outs[self.cnames[3]].reshape( (X.shape[1], X.shape[2], -1)) inps = [forget_below, input_below, output_below, cell_below] h, c = self.lstm_layer.fprop(inps=inps, mask=mask, batch_size=self.batch_size) if self.deepout: h_deepout = self.deepout_layer_ht.fprop(h) emb_deepout = self.deepout_layer_qbow.fprop(new_bow) z = Leaky_Rect(h_deepout + emb_deepout, 0.01) if self.dropout: dropOp = Dropout(dropout_prob=self.dropout) z = dropOp(z, deterministic=not use_noise) else: z = h if self.dropout: dropOp = Dropout(dropout_prob=self.dropout) z = dropOp(z, deterministic=not use_noise) out_layer = self.out_layer.fprop(z, deterministic=not use_noise) self.probs = Softmax(out_layer) return self.probs, h
class TrainMeter(object): """Measures training stats.""" def __init__(self, epoch_iters): self.epoch_iters = epoch_iters self.max_iter = cfg.OPTIM.MAX_EPOCH * epoch_iters self.iter_timer = Timer() self.desc_loss = ScalarMeter(cfg.LOG_PERIOD) self.desc_loss_total = 0.0 self.att_loss = ScalarMeter(cfg.LOG_PERIOD) self.att_loss_total = 0.0 self.lr = None # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_att_top1_mis = 0 self.num_att_top5_mis = 0 self.num_samples = 0 def reset(self, timer=False): if timer: self.iter_timer.reset() self.desc_loss.reset() self.att_loss.reset() self.desc_loss_total = 0.0 self.att_loss_total = 0.0 self.lr = None self.mb_top1_err.reset() self.mb_top5_err.reset() self.mb_att_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_att_top5_err = ScalarMeter(cfg.LOG_PERIOD) self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_att_top1_mis = 0 self.num_att_top5_mis = 0 self.num_samples = 0 def iter_tic(self): self.iter_timer.tic() def iter_toc(self): self.iter_timer.toc() def update_stats(self, desc_top1_err, desc_top5_err, att_top1_err, att_top5_err, desc_loss, att_loss, lr, mb_size): # Current minibatch stats self.mb_top1_err.add_value(desc_top1_err) self.mb_top5_err.add_value(desc_top5_err) self.desc_loss.add_value(desc_loss) self.mb_att_top1_err.add_value(att_top1_err) self.mb_att_top5_err.add_value(att_top5_err) self.att_loss.add_value(att_loss) self.lr = lr # Aggregate stats self.num_top1_mis += desc_top1_err * mb_size self.num_top5_mis += desc_top5_err * mb_size self.num_att_top1_mis += att_top1_err * mb_size self.num_att_top5_mis += att_top5_err * mb_size self.desc_loss_total += desc_loss * mb_size self.att_loss_total += att_loss * mb_size self.num_samples += mb_size def get_iter_stats(self, cur_epoch, cur_iter): cur_iter_total = cur_epoch * self.epoch_iters + cur_iter + 1 eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total) mem_usage = gpu_mem_usage() stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.epoch_iters), "time_avg": self.iter_timer.average_time, "time_diff": self.iter_timer.diff, "eta": time_string(eta_sec), "desc_top1_err": self.mb_top1_err.get_win_median(), "desc_top5_err": self.mb_top5_err.get_win_median(), "desc_loss": self.desc_loss.get_win_median(), "att_top1_err": self.mb_att_top1_err.get_win_median(), "att_top5_err": self.mb_att_top5_err.get_win_median(), "att_loss": self.att_loss.get_win_median(), "lr": self.lr, "mem": int(np.ceil(mem_usage)), } return stats def log_iter_stats(self, cur_epoch, cur_iter): if (cur_iter + 1) % cfg.LOG_PERIOD != 0: return stats = self.get_iter_stats(cur_epoch, cur_iter) logger.info(logging.dump_log_data(stats, "train_iter")) def get_epoch_stats(self, cur_epoch): cur_iter_total = (cur_epoch + 1) * self.epoch_iters eta_sec = self.iter_timer.average_time * (self.max_iter - cur_iter_total) mem_usage = gpu_mem_usage() desc_top1_err = self.num_top1_mis / self.num_samples desc_top5_err = self.num_top5_mis / self.num_samples desc_avg_loss = self.desc_loss_total / self.num_samples att_top1_err = self.num_att_top1_mis / self.num_samples att_top5_err = self.num_att_top5_mis / self.num_samples att_avg_loss = self.att_loss_total / self.num_samples stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "time_avg": self.iter_timer.average_time, "eta": time_string(eta_sec), "desc_top1_err": desc_top1_err, "desc_top5_err": desc_top5_err, "desc_loss": desc_avg_loss, "att_top1_err": att_top1_err, "att_top5_err": att_top5_err, "att_loss": att_avg_loss, "lr": self.lr, "mem": int(np.ceil(mem_usage)), } return stats def log_epoch_stats(self, cur_epoch): stats = self.get_epoch_stats(cur_epoch) logger.info(logging.dump_log_data(stats, "train_epoch"))
def __init__(self): self.timer = Timer() self.scheduler = Scheduler() self.memory = RAM() self.processes = [] self.cpus = [CPU(1)]
class NTMModel(Model): """ NTM model. """ def __init__(self, n_in, n_hids, n_out, mem_size, mem_nel, deep_out_size, bow_size=40, inps=None, dropout=None, predict_bow_out=False, seq_len=None, n_read_heads=1, n_layers=1, n_write_heads=1, train_profile=False, erase_activ=None, content_activ=None, l1_pen=None, l2_pen=None, use_reinforce=False, use_reinforce_baseline=False, n_reading_steps=2, use_gru_inp_rep=False, use_simple_rnn_inp_rep=False, use_nogru_mem2q=False, sub_mb_size=40, lambda1_rein=2e-4, lambda2_rein=2e-5, baseline_reg=1e-2, anticorrelation=None, use_layer_norm=False, recurrent_dropout_prob=-1, correlation_ws=None, hybrid_att=True, max_fact_len=7, use_dice_val=False, use_qmask=False, renormalization_scale=4.8, w2v_embed_scale=0.42, emb_scale=0.32, use_soft_att=False, use_hard_att_eval=False, use_batch_norm=False, learning_rule=None, use_loc_based_addressing=True, smoothed_diff_weights=False, use_multiscale_shifts=True, use_ff_controller=False, use_gate_quad_interactions=False, permute_order=False, wpenalty=None, noise=None, w2v_embed_path=None, glove_embed_path=None, learn_embeds=True, use_last_hidden_state=False, use_adv_indexing=False, use_bow_input=True, use_out_mem=True, use_deepout=True, use_q_mask=False, use_inp_content=True, rnd_indxs=None, address_size=0, learn_h0=False, use_context=False, debug=False, controller_activ=None, mem_gater_activ=None, weight_initializer=None, bias_initializer=None, use_cost_mask=True, use_bow_cost_mask=True, theano_function_mode=None, batch_size=32, use_noise=False, reinforce_decay=0.9, softmax=False, use_mask=False, name="ntm_model", **kwargs): assert deep_out_size is not None, ("Size of the deep output " " should not be None.") if sub_mb_size is None: sub_mb_size = batch_size assert sub_mb_size <= batch_size, "batch_size should be greater than sub_mb_size" self.hybrid_att = hybrid_att self.state = locals() self.use_context = use_context self.eps = 1e-8 self.use_mask = use_mask self.l1_pen = l1_pen self.l2_pen = l2_pen self.l2_penalizer = None self.emb_scale = emb_scale self.w2v_embed_path = w2v_embed_path self.glove_embed_path = glove_embed_path self.learn_embeds = learn_embeds self.exclude_params = {} self.use_gate_quad_interactions = use_gate_quad_interactions self.reinforce_decay = reinforce_decay self.max_fact_len = max_fact_len self.lambda1_reinf = lambda1_rein self.lambda2_reinf = lambda2_rein self.use_reinforce_baseline = use_reinforce_baseline self.use_reinforce = use_reinforce self.use_gru_inp_rep = use_gru_inp_rep self.use_simple_rnn_inp_rep = use_simple_rnn_inp_rep self.use_q_mask = use_q_mask self.use_inp_content = use_inp_content self.rnd_indxs = rnd_indxs self.use_layer_norm = use_layer_norm self.recurrent_dropout_prob = recurrent_dropout_prob self.n_reading_steps = n_reading_steps self.sub_mb_size = sub_mb_size self.predict_bow_out = predict_bow_out self.correlation_ws = correlation_ws self.smoothed_diff_weights = smoothed_diff_weights self.use_soft_att = use_soft_att self.use_hard_att_eval = use_hard_att_eval if anticorrelation and n_read_heads < 2: raise ValueError("Anti-correlation of the attention weight" " do not support the multiple read heads.") self.anticorrelation = anticorrelation if self.predict_bow_out: if len(inps) <= 4: raise ValueError( "The number of inputs should be greater than 4.") if l2_pen: self.l2_penalizer = L2Penalty(self.l2_pen) #assert use_bow_input ^ use_gru_inp_rep ^ self.use_simple_rnn_inp_rep, \ # "You should either use GRU or BOW input." self.renormalization_scale = renormalization_scale self.w2v_embed_scale = w2v_embed_scale self.baseline_reg = baseline_reg self.inps = inps self.erase_activ = erase_activ self.use_ff_controller = use_ff_controller self.content_activ = content_activ self.use_bow_cost_mask = use_bow_cost_mask self.ntm_outs = None self.theano_function_mode = theano_function_mode self.n_in = n_in self.dropout = dropout self.wpenalty = wpenalty self.noise = noise self.bow_size = bow_size self.use_last_hidden_state = use_last_hidden_state self.use_loc_based_addressing = use_loc_based_addressing self.train_profile = train_profile self.use_nogru_mem2q = use_nogru_mem2q self.use_qmask = use_qmask self.permute_order = permute_order self.use_batch_norm = use_batch_norm # Use this if you have a ff-controller because otherwise this is not effective: self.n_layers = n_layers if self.use_reinforce: reinforceCls = REINFORCE if not self.use_reinforce_baseline: reinforceCls = REINFORCEBaselineExt self.Reinforce = reinforceCls(lambda1_reg=self.lambda1_reinf, lambda2_reg=self.lambda2_reinf, decay=self.reinforce_decay) self.ReaderReinforce = \ ReinforcePenalty(reinf_level=self.lambda1_reinf, maxent_level=self.lambda2_reinf, use_reinforce_baseline=self.use_reinforce_baseline) self.dice_val = None if use_dice_val: self.dice_val = sharedX(1.) self.use_dice_val = use_dice_val if bow_size is None: raise ValueError("bow_size should be specified.") if name is None: raise ValueError("name should not be empty.") self.n_hids = n_hids self.mem_size = mem_size self.use_deepout = use_deepout self.mem_nel = mem_nel self.n_out = n_out self.use_out_mem = use_out_mem self.use_multiscale_shifts = use_multiscale_shifts self.address_size = address_size self.n_read_heads = n_read_heads self.n_write_heads = n_write_heads self.learn_h0 = learn_h0 self.use_adv_indexing = use_adv_indexing self.softmax = softmax self.use_bow_input = use_bow_input self.use_cost_mask = use_cost_mask self.deep_out_size = deep_out_size self.controller_activ = controller_activ self.mem_gater_activ = mem_gater_activ self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer if batch_size: self.batch_size = batch_size else: self.batch_size = inps[0].shape[1] #assert self.batch_size >= self.sub_mb_size, ("Minibatch size should be " # " greater than the sub minibatch size") self.comp_grad_fn = None self.name = name self.use_noise = use_noise self.train_timer = Timer("Training function") self.gradfn_timer = Timer("Gradient function") self.grads_timer = Timer("Computing the grads") self.reset() self.seq_len = TT.iscalar('seq_len') self.__convert_inps_to_list() if debug: if self.use_gru_inp_rep or self.use_bow_input: self.seq_len.tag.test_value = self.inps[ 0].tag.test_value.shape[1] else: self.seq_len.tag.test_value = self.inps[ 0].tag.test_value.shape[0] self.learning_rule = learning_rule if self.predict_bow_out: self.bow_out_w = TT.fscalar("bow_out_w") if debug: self.bow_out_w.tag.test_value = np.float32(1.0) else: self.bow_out_w = 0 def __convert_inps_to_list(self): if isinstance(self.inps, list): X = self.inps[0] y = self.inps[1] if self.use_mask: mask = self.inps[2] cmask = None inps = [X, y] if self.use_mask: inps += [mask] if self.use_cost_mask: cmask = self.inps[3] inps += [cmask] if self.correlation_ws or self.use_qmask: self.qmask = self.inps[5] inps += [self.qmask] if self.predict_bow_out: bow_out = self.inps[4] inps += [bow_out] self.inps = inps else: X = self.inps['X'] y = self.inps['y'] mask = self.inps['mask'] cmask = None inps = [X, y] if self.use_mask: inps += [mask] if self.use_cost_mask: cmask = self.inps['cmask'] inps += [cmask] if self.correlation_ws or self.use_qmask: self.qmask = self.inps['qmask'] inps += [self.qmask] if self.predict_bow_out: bow_out = self.inps['bow_out'] inps += [bow_out] self.inps = inps def reset(self): self.params = Parameters() if self.w2v_embed_path and (self.use_bow_input or self.use_gru_inp_rep): self.w2v_embeds = pkl.load(open(self.w2v_embed_path, "rb")) if self.glove_embed_path: logger.info("Loading the GLOVE embeddings...") self.glove_embeds = pkl.load(open(self.glove_embed_path, "rb")) self.reg = 0 self.ntm = None self.merge_layer = None self.out_layer = None self.bow_layer = None self.baseline_out = None self.bow_pred_out = None self.gru_fact_layer_inps = None self.gru_fact_layer = None self.rnn_fact_layer_inps = None self.rnn_fact_layer = None self.bow_out_layer = None self.inp_proj_layer = None self.batch_norm_layer = None self.children = [] self.trainpartitioner = None self.known_grads = OrderedDict({}) self.updates = OrderedDict({}) def __init_to_embeds(self, layer, params, embeds, scale=0.42): logger.info("Initializing to word2vec embeddings.") if not isinstance(params, list): params = [params] for pp in params: pv = pp.get_value() for i, v in embeds.items(): pv[i] = scale * v layer.params[pp.name] = pv def __init_glove_embeds(self, layer, params, embeds): logger.info("Initializing to GLOVE embeddings.") if not isinstance(params, list): params = [params] glove_embs = self.emb_scale * embeds.astype("float32") mean = glove_embs.mean() std = glove_embs.std() token_embs = np.random.normal(loc=mean, scale=std, size=(2, 300)) token_embs = np.concatenate([token_embs, glove_embs], axis=0) for pp in params: self.exclude_params[pp.name] = 1 layer.params[pp.name] = token_embs.astype( "float32") #, name=pp.name) def build_model(self, use_noise=False, mdl_name=None): if self.use_ff_controller: cls = NTMFFController else: cls = NTM if use_noise: mem_gater_activ = lambda x: self.mem_gater_activ( x, use_noise=use_noise) if self.use_bow_input and not self.bow_layer and not self.use_gru_inp_rep: self.bow_layer = BOWLayer( n_in=self.n_in, n_out=self.bow_size, seq_len=self.max_fact_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, use_average=False, name=self.pname("bow_layer")) if self.w2v_embed_path: fparams = self.bow_layer.params.lfilterby("weight") self.__init_to_embeds(self.bow_layer, fparams, self.w2v_embeds, scale=self.w2v_embed_scale) elif self.use_gru_inp_rep: if not self.gru_fact_layer_inps: low_cnames = [ "low_reset_below", "low_gater_below", "low_state_below" ] lnfout = len(low_cnames) self.low_cnames = map(lambda x: self.pname(x), low_cnames) self.gru_fact_layer_inps = ForkLayer( n_in=self.n_in, n_outs=tuple([self.bow_size for i in xrange(lnfout)]), weight_initializer=self.weight_initializer, use_bias=False, names=self.low_cnames) if self.w2v_embed_path: fparams = self.gru_fact_layer_inps.params.lfilterby( "weight") self.__init_to_embeds(self.gru_fact_layer_inps, fparams, self.w2v_embeds) if not self.gru_fact_layer: self.gru_fact_layer = GRULayer( n_in=self.bow_size, n_out=self.bow_size, seq_len=self.max_fact_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, activ=Tanh, learn_init_state=self.learn_h0, name=self.pname("gru_fact_layer")) elif self.use_simple_rnn_inp_rep: if not self.rnn_fact_layer_inps: self.rnn_fact_layer_inps = AffineLayer( n_in=self.n_in, n_out=self.bow_size, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("rnn_fact_layer_inps")) if self.w2v_embed_path: fparams = self.rnn_fact_layer_inps.params.lfilterby( "weight") self.__init_to_embeds(self.rnn_fact_layer_inps, fparams, self.w2v_embeds) if not self.rnn_fact_layer: self.rnn_fact_layer = RNNLayer( n_in=self.n_in, n_out=self.bow_size, seq_len=self.max_fact_len, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, activ=Rect, learn_init_state=self.learn_h0, name=self.pname("rnn_fact_layer")) else: if not self.inp_proj_layer: self.inp_proj_layer = AffineLayer( n_in=self.n_in, n_out=self.bow_size, weight_initializer=self.weight_initializer, use_bias=False, bias_initializer=self.bias_initializer, name=self.pname("ntm_inp_proj_layer")) if self.glove_embed_path: fparams = self.inp_proj_layer.params.lfilterby("weight") self.__init_glove_embeds(self.inp_proj_layer, fparams, self.glove_embeds) if self.predict_bow_out and not self.bow_out_layer: self.bow_out_layer = AffineLayer( n_in=self.n_hids, n_out=self.n_out, weight_initializer=self.weight_initializer, noise=self.noise, wpenalty=self.wpenalty, bias_initializer=self.bias_initializer, name=self.pname("bow_out_layer")) if self.use_batch_norm and not self.batch_norm_layer: self.batch_norm_layer = BatchNormLayer( n_in=self.bow_size, n_out=self.bow_size, name=self.pname("batch_norm_inp")) if not self.ntm: inp = self.inps[0] bs = inp.shape[1] if inp.ndim == 4: bs = inp.shape[2] self.ntm = cls( n_in=self.bow_size, n_hids=self.n_hids, l1_pen=self.l1_pen, learn_h0=self.learn_h0, hybrid_att=self.hybrid_att, smoothed_diff_weights=self.smoothed_diff_weights, use_layer_norm=self.use_layer_norm, recurrent_dropout_prob=self.recurrent_dropout_prob, use_bow_input=self.use_bow_input, use_loc_based_addressing=self.use_loc_based_addressing, use_reinforce=self.use_reinforce, erase_activ=self.erase_activ, content_activ=self.content_activ, mem_nel=self.mem_nel, address_size=self.address_size, use_context=self.use_context, n_read_heads=self.n_read_heads, use_soft_att=self.use_soft_att, use_hard_att_eval=self.use_hard_att_eval, use_inp_content=self.use_inp_content, n_write_heads=self.n_write_heads, dice_val=self.dice_val, mem_size=self.mem_size, use_nogru_mem2q=self.use_nogru_mem2q, use_gru_inp_rep=self.use_gru_inp_rep, weight_initializer=self.weight_initializer, use_adv_indexing=self.use_adv_indexing, wpenalty=self.wpenalty, noise=self.noise, n_layers=self.n_layers, bias_initializer=self.bias_initializer, use_quad_interactions=self.use_gate_quad_interactions, controller_activ=self.controller_activ, mem_gater_activ=self.mem_gater_activ, batch_size=self.batch_size if self.batch_size else None, use_multiscale_shifts=self.use_multiscale_shifts, n_reading_steps=self.n_reading_steps, seq_len=self.seq_len, name=self.pname("ntm"), use_noise=use_noise) if not self.merge_layer and self.use_deepout: self.merge_layer = MergeLayer( n_ins=[self.n_hids, self.mem_size], n_out=self.deep_out_size, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, names=[self.pname("deep_controller"), self.pname("deep_mem")]) if self.use_deepout: out_layer_in = self.deep_out_size else: out_layer_in = self.n_hids if self.use_out_mem: self.out_mem = AffineLayer( n_in=self.mem_size + self.address_size, n_out=self.n_out, weight_initializer=self.weight_initializer, wpenalty=self.wpenalty, noise=self.noise, bias_initializer=self.bias_initializer, name=self.pname("out_mem")) self.out_scaler = AffineLayer( n_in=self.n_hids, n_out=1, weight_initializer=self.weight_initializer, wpenalty=self.wpenalty, noise=self.noise, bias_initializer=self.bias_initializer, name=self.pname("out_scaler")) if not self.out_layer: self.out_layer = AffineLayer( n_in=out_layer_in, n_out=self.n_out, wpenalty=self.wpenalty, noise=self.noise, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, name=self.pname("out")) if self.ntm.updates: self.updates.update(self.ntm.updates) if not self.use_reinforce_baseline and self.use_reinforce: self.baseline_out = AffineLayer( n_in=self.n_hids, n_out=1, weight_initializer=self.weight_initializer, bias_initializer=self.bias_initializer, init_bias_val=1e-3, name=self.pname("baseline_out")) if not self.children: self.children.append(self.ntm) if self.use_deepout and self.merge_layer: self.children.append(self.merge_layer) self.children.append(self.out_layer) if self.use_out_mem: self.children.extend([self.out_mem, self.out_scaler]) if self.use_bow_input and self.bow_layer and not self.use_gru_inp_rep: self.children.append(self.bow_layer) elif self.use_gru_inp_rep: self.children.extend( [self.gru_fact_layer_inps, self.gru_fact_layer]) elif self.use_simple_rnn_inp_rep: self.children.extend( [self.rnn_fact_layer_inps, self.rnn_fact_layer]) else: self.children.append(self.inp_proj_layer) if self.predict_bow_out and self.bow_out_layer: self.children.append(self.bow_out_layer) if self.use_reinforce and not self.use_reinforce_baseline: self.children.append(self.baseline_out) if self.use_batch_norm: self.children.append(self.batch_norm_layer) self.merge_params() if self.renormalization_scale: self.params.renormalize_params( nscale=self.renormalization_scale, exclude_params=self.exclude_params) if mdl_name: if os.path.exists(mdl_name): logger.info("Reloading model from %s." % mdl_name) self.params.load(mdl_name) [child.use_params(self.params) for child in self.children] else: warnings.warn( "The model file does not exist and could not load it.") if self.trainpartitioner is None and self.sub_mb_size: self.trainpartitioner = MinibatchGradPartitioner( self.params, self.sub_mb_size, self.batch_size, seq_len=self.seq_len) def get_cost(self, use_noise=False, valid_only=False, mdl_name=None): probs, _ = self.fprop(use_noise=use_noise, mdl_name=mdl_name) if isinstance(self.inps, list): X = self.inps[0] y = self.inps[1] if self.use_mask: mask = self.inps[2] cmask = None if self.use_cost_mask: cmask = self.inps[3] else: X = self.inps['x'] y = self.inps['y'] mask = self.inps['mask'] cmask = None if self.use_cost_mask: cmask = self.inps['cmask'] if self.l1_pen and self.l1_pen > 0 and not valid_only: self.reg += self.ntm.reg if self.l2_pen and not valid_only: self.l2_penalizer.penalize_layer_weights(self.out_layer) self.l2_penalizer.penalize_params( self.ntm.params.filterby("init_state").values[0]) self.l2_penalizer.penalize_params( self.ntm.controller.params.filterby("weight").values[0]) if not self.use_ff_controller: self.l2_penalizer.penalize_params( self.ntm.controller.params.filterby( "state_before_ht").values[0]) self.reg += self.l2_penalizer.get_penalty_level() if not self.softmax: self.cost = kl(y, probs, cost_mask=cmask) self.errors = 0 else: if not self.use_last_hidden_state: self.cost, self.errors = nll(y, probs, cost_mask=cmask) else: self.cost, self.errors = nll(y, probs) if self.cost.ndim == 2: self.cost_mon = self.cost.sum(0).mean() if valid_only: self.cost = self.cost_mon else: self.cost_mon = self.cost.mean() if valid_only: self.cost = self.cost_mon bow_cost = 0 if not valid_only: bow_cost_shifted = 0 if self.predict_bow_out and self.bow_pred_out and self.bow_out_layer: bow_target = self.inps[-1] bcmask = mask * TT.cast(TT.eq(cmask, 0), "float32") sum_tru_time = False cost_matrix = True if self.use_reinforce and \ not sum_tru_time else False batch_vec = True if self.use_reinforce else False bow_cost = self.bow_out_w * kl(bow_target, self.bow_pred_out, batch_vec=batch_vec, sum_tru_time=sum_tru_time, cost_matrix=cost_matrix, cost_mask=bcmask, normalize_by_outsize=True) if cost_matrix: bow_cost_shifted = TT.zeros_like(bow_cost) bow_cost_shifted = TT.set_subtensor(bow_cost_shifted[1:], \ bow_cost[:-1]) else: bow_cost_shifted = bow_cost self.center = 0 self.cost_std = 1 if self.use_reinforce and self.use_reinforce_baseline: self.cost_mon = self.cost if not self.use_mask: mask = None self.updates, self.known_grads, self.baseline, cost_std, \ self.write_policy, maxent_level = self.Reinforce(probs=self.write_weights, samples=self.w_samples, updates=self.updates, cost=(1 - self.bow_out_w) * self.cost + bow_cost_shifted, mask=mask) maxent_level = self.lambda2_reinf elif self.use_reinforce: if "float" in X.dtype: self.baseline = self.baseline_out.fprop( self.ntm_outs[0]).reshape( (X.shape[0], X.shape[1])).dimshuffle(0, 1, 'x') else: self.baseline = self.baseline_out.fprop( self.ntm_outs[0]).reshape((X.shape[1], X.shape[2], -1)) mask_ = None mask = None if self.use_mask: if mask: mask_ = mask if mask.ndim == 2: mask_ = mask.dimshuffle(0, 1, 'x') self.baseline = mask_ * self.baseline if not self.softmax: self.cost = kl(y, probs, cost_mask=cmask, cost_matrix=True) self.errors = 0 else: self.cost, self.errors = nll(y, probs, cost_mask=cmask, cost_matrix=True) self.updates, self.known_grads, self.center, self.cost_std, \ self.write_policy, maxent_level = \ self.Reinforce(probs=self.write_weights, samples=self.w_samples, baseline=self.baseline, updates=self.updates, cost=(1 - self.bow_out_w) * self.cost + \ bow_cost_shifted, mask=mask) if self.cost.ndim == 2: hcost = self.cost.sum(0).dimshuffle('x', 0, 'x') else: hcost = self.cost.dimshuffle(0, 'x', 'x') base_reg = huber_loss(y_hat=self.baseline, target=block_gradient(hcost), center=block_gradient(self.center), std=block_gradient(self.cost_std)) if self.cost.ndim == 2: self.cost_mon = self.cost.sum(0).mean() else: self.cost_mon = self.cost.mean() if mask_: base_reg = mask_ * base_reg self.base_reg = self.baseline_reg * base_reg.sum(0).mean() self.reg += self.base_reg if self.use_reinforce: self.ReaderReinforce.maxent_level = maxent_level self.read_constraint, self.read_policy = \ self.ReaderReinforce(baseline=self.baseline, cost=self.cost + bow_cost, probs=self.read_weights, samples=self.r_samples, mask=mask, center=self.center, cost_std=self.cost_std) if self.cost.ndim == 2: self.cost = self.cost.sum(0).mean() else: self.cost = self.cost.mean() if bow_cost != 0 and bow_cost.ndim >= 1 and bow_cost != 0: bow_cost = bow_cost.sum(0).mean() if self.predict_bow_out and bow_cost: self.cost = (1 - self.bow_out_w) * self.cost + bow_cost if self.use_reinforce and self.read_constraint: self.cost += self.read_constraint if self.reg: self.cost += self.reg return self.cost, self.errors, bow_cost def get_inspect_fn(self, mdl_name=None): logger.info("Compiling inspect function.") probs, ntm_outs = self.fprop(use_noise=False, mdl_name=mdl_name) updates = OrderedDict({}) if self.ntm.updates and self.use_reinforce: updates.update(self.ntm.updates) inspect_fn = theano.function( [self.inps[0], self.inps[2], self.inps[3], self.seq_len], ntm_outs + [probs], updates=self.ntm.updates, name=self.pname("inspect_fn")) return inspect_fn def get_valid_fn(self, mdl_name=None): logger.info("Compiling validation function.") if self.predict_bow_out or self.bow_out_layer: if self.inps[-1].name == "bow_out": inps = self.inps[:-1] else: inps = self.inps if self.softmax: cost, errors, _ = self.get_cost(use_noise=True, valid_only=True, mdl_name=mdl_name) if self.ntm.updates: self.updates.update(self.ntm.updates) valid_fn = theano.function(inps + [self.seq_len], [cost, errors], updates=self.ntm.updates, on_unused_input='warn', name=self.pname("valid_fn")) else: cost, _, _ = self.get_cost(use_noise=False, mdl_name=mdl_name) if self.ntm.updates: self.updates.update(self.ntm.updates) valid_fn = theano.function(inps + [self.seq_len], [cost], updates=self.ntm.updates, on_unused_input='warn', name=self.pname("valid_fn")) return valid_fn def add_noise_to_params(self): for k, v in self.params.__dict__['params'].iteritems(): v_np = v.get_value(borrow=True) noise = global_rng.normal(0, 0.05, v_np.shape) self.params[k] = v_np + noise def get_train_fn(self, lr=None, mdl_name=None): if lr is None: lr = self.eps if self.softmax: cost, errors, bow_cost = self.get_cost(use_noise=True, mdl_name=mdl_name) else: cost, _, _ = self.get_cost(use_noise=True, mdl_name=mdl_name) params = self.params.values logger.info("Computing the gradients.") self.grads_timer.start() inps = self.inps if self.predict_bow_out: inps = self.inps + [self.bow_out_w] if not self.learn_embeds: params.pop(0) grads = safe_grad(cost, params, known_grads=self.known_grads) self.grads_timer.stop() logger.info(self.grads_timer) logger.info("Compiling grad fn.") self.gradfn_timer.start() if self.sub_mb_size: if self.sub_mb_size != self.batch_size: self.comp_grad_fn, grads = self.trainpartitioner.get_compute_grad_fn( grads, self.ntm.updates, inps) gnorm = sum(grad.norm(2) for _, grad in grads.iteritems()) updates, norm_up, param_norm = self.learning_rule.get_updates( learning_rate=lr, grads=grads) self.gradfn_timer.stop() logger.info(self.gradfn_timer) if self.updates: self.updates.update(updates) else: self.updates = updates warnings.warn("WARNING: Updates are empty.") logger.info("Compiling the training function.") self.train_timer.start() if hasattr(self, "cost_mon"): outs = [self.cost_mon, gnorm, norm_up, param_norm] else: outs = [cost, gnorm, norm_up, param_norm] if self.softmax: outs += [self.errors] if self.predict_bow_out: outs += [bow_cost] if self.use_reinforce: outs += [self.read_constraint, self.baseline, self.read_policy, \ self.write_policy] if not self.use_reinforce_baseline: outs += [self.center, self.cost_std, self.base_reg] if self.use_batch_norm: self.updates.update(self.batch_norm_layer.updates) train_fn = theano.function(inps + [self.seq_len], outs, updates=self.updates, mode=self.theano_function_mode, name=self.pname("train_fn")) self.train_timer.stop() logger.info(self.train_timer) if self.train_profile: import sys sys.exit(-1) return train_fn def fprop(self, inps=None, leak_rate=0.05, use_noise=False, mdl_name=None): self.build_model(use_noise=use_noise, mdl_name=mdl_name) self.ntm.evaluation_mode = use_noise if not inps: inps = self.inps # First two are X and targets # assert (2 + sum([use_mask, use_cmask])) + 1 >= len(inps), \ # "inputs have illegal shape." cmask = None mask = None if isinstance(inps, list): X = inps[0] y = inps[1] if self.use_mask: mask = inps[2] if self.use_cost_mask: cmask = inps[3] else: X = inps['X'] y = inps['y'] if self.use_mask: mask = inps['mask'] if self.use_cost_mask: cmask = inps['cmask'] if self.use_cost_mask: if cmask is not None: if self.use_bow_cost_mask: if mask.ndim == cmask.ndim: m = (mask * TT.eq(cmask, 0)).reshape( (cmask.shape[0] * cmask.shape[1], -1)) else: m = (mask.dimshuffle(0, 1, 'x') * TT.eq(cmask, 0))[:, :, 0].reshape( (cmask.shape[0] * cmask.shape[1], -1)) else: m = mask else: raise ValueError("Mask for the answers should not be empty.") if X.ndim == 2 and y.ndim == 1: # For sequential MNIST. if self.permute_order: X = X.dimshuffle(1, 0) idxs = self.rnd_indxs X = X[idxs] inp_shp = (X.shape[0], X.shape[1], -1) else: inp_shp = (X.shape[1], X.shape[2], -1) #import pdb;pdb.set_trace() self.ntm_in = None if self.use_bow_input and not self.use_gru_inp_rep and not self.use_simple_rnn_inp_rep: bow_out = self.bow_layer.fprop(X, amask=m, deterministic=not use_noise) bow_out = bow_out.reshape((X.shape[1], X.shape[2], -1)) self.ntm_in = bow_out elif self.use_gru_inp_rep: m0 = as_floatX(TT.gt(X, 0)) if self.use_mask and self.use_cost_mask: if cmask is not None: m1 = mask * TT.eq(cmask, 0) else: raise ValueError( "Mask for the answers should not be empty.") low_inp_shp = (X.shape[0], X.shape[1] * X.shape[2], -1) Xr = X.reshape(low_inp_shp) grufact_inps = self.gru_fact_layer_inps.fprop(Xr) low_reset_below = grufact_inps.values()[0].reshape(low_inp_shp) low_gater_below = grufact_inps.values()[1].reshape(low_inp_shp) low_state_below = grufact_inps.values()[2].reshape(low_inp_shp) linps = [low_reset_below, low_gater_below, low_state_below] m0_part = TT.cast( m0.sum(0).reshape( (X.shape[1], X.shape[2])).dimshuffle(0, 1, 'x'), 'float32') m0_part = TT.switch(TT.eq(m0_part, as_floatX(0)), as_floatX(1), m0_part) h0 = self.gru_fact_layer.fprop(inps=linps, mask=m0, batch_size=self.batch_size) self.ntm_in = m1.dimshuffle(0, 1, 'x') * ((m0.dimshuffle(0, 1, 2, 'x') * h0.reshape((X.shape[0], X.shape[1], X.shape[2], -1))).sum(0) \ / m0_part).reshape(inp_shp) elif self.use_simple_rnn_inp_rep: m0 = as_floatX(TT.gt(X, 0)) if cmask is not None: m1 = mask * TT.eq(cmask, 0) else: raise ValueError("Mask for the answers should not be empty.") low_inp_shp = (X.shape[0], X.shape[1] * X.shape[2], -1) Xr = X.reshape(low_inp_shp) rnnfact_inps = self.rnn_fact_layer_inps.fprop(Xr).reshape( low_inp_shp) m0 = m0.reshape(low_inp_shp) h0 = self.rnn_fact_layer.fprop(inps=rnnfact_inps, mask=m0, batch_size=self.batch_size) m0_part = TT.cast( m0.sum(0).reshape( (X.shape[1], X.shape[2])).dimshuffle(0, 1, 'x'), 'float32') m0_part = TT.switch(m0_part == 0, as_floatX(1), m0_part) self.ntm_in = m1.dimshuffle(0, 1, 'x') * (h0.reshape((X.shape[0], X.shape[1], X.shape[2], -1)).sum(0) / \ m0_part).reshape(inp_shp) else: X_proj = self.inp_proj_layer.fprop(X) if not self.learn_embeds: X_proj = block_gradient(X_proj) if self.use_batch_norm: X_proj = self.batch_norm_layer.fprop(X_proj, inference=not use_noise) self.ntm_in = X_proj context = None if self.use_context: if self.use_qmask: context = (self.qmask.dimshuffle(0, 1, 'x') * self.ntm_in).sum(0) else: m1_part = m1.sum(0).dimshuffle(0, 'x') context = self.ntm_in.sum(0) / m1_part self.ntm_outs = self.ntm.fprop(self.ntm_in, mask=mask, cmask=cmask, context=context, batch_size=self.batch_size, use_mask=self.use_mask, use_noise=not use_noise) h, m_read = self.ntm_outs[0], self.ntm_outs[2] if self.use_reinforce: self.w_samples, self.r_samples = self.ntm_outs[-2], self.ntm_outs[ -1] if self.smoothed_diff_weights: idx = -6 else: idx = -4 self.write_weights, self.read_weights = self.ntm_outs[idx], \ self.ntm_outs[idx+1] else: self.write_weights, self.read_weights = self.ntm_outs[ 3], self.ntm_outs[4] if self.anticorrelation: acorr = AntiCorrelationConstraint(level=self.anticorrelation) rw1 = self.read_weights[:, 0] rw2 = self.read_weights[:, 1] self.reg += acorr(rw1, rw2, mask=mask) if self.correlation_ws: logger.info("Applying the correlation constraint.") corr_cons = CorrelationConstraint(level=self.correlation_ws) self.reg += corr_cons(self.read_weights, self.write_weights, mask, self.qmask) if self.use_last_hidden_state: h = h.reshape(inp_shp) h = h[-1] if self.use_deepout: merged_out = self.merge_layer.fprop([h, m_read]) out_layer = Leaky_Rect(merged_out, leak_rate) if self.dropout: dropOp = Dropout(dropout_prob=self.dropout) out_layer = dropOp(out_layer, deterministic=not use_noise) out_layer = self.out_layer.fprop(out_layer, deterministic=not use_noise) else: if self.use_out_mem: if self.dropout: dropOp = Dropout(dropout_prob=self.dropout) m_read = dropOp(m_read, deterministic=not use_noise) mem_out = self.out_mem.fprop(m_read, deterministic=not use_noise) mem_scaler = self.out_scaler.fprop( h, deterministic=not use_noise).reshape( (mem_out.shape[0], )).dimshuffle(0, 'x') h_out = self.out_layer.fprop(h, deterministic=not use_noise) out_layer = h_out + mem_out * Sigmoid(mem_scaler) else: if self.dropout: dropOp = Dropout(dropout_prob=self.dropout) h = dropOp(h, deterministic=not use_noise) out_layer = self.out_layer.fprop(h, deterministic=not use_noise) if self.predict_bow_out and self.bow_out_layer: logger.info("Using the bow output prediction.") self.bow_pred_out = Sigmoid( self.bow_out_layer.fprop(h, deterministic=not use_noise)) if self.softmax: self.probs = Softmax(out_layer) else: self.probs = Sigmoid(out_layer) if self.ntm.updates: self.updates.update(self.ntm.updates) self.str_params(logger) self.h = h return self.probs, self.ntm_outs def __get_state__(self): return self.state def __set_state__(self, state): self.__dict__.update(state)
rpg.output_invert_movement.subscribe(lambda i: cpg.input_flip.on_next(i)) cpg.output_muscle_stimuli.subscribe(lambda ms: process_muscle_stimulus(ms)) # ========== configure logging ========== log_path = "../log/demo.txt" if demo_mode else "../log/logfile.txt" logging = Logging(log_path, 10) logging.add_logger(Logging.Logger("vis_feedback", env.visual_feedback_label)) logging.add_logger(Logging.Logger("prediction", vis.output_prediction_label)) logging.add_logger(Logging.Logger("danger", eval.output_danger_level)) logging.add_logger(Logging.Logger("opportunity", eval.output_opportunity_level)) logging.add_logger(Logging.Logger("invert", rpg.output_invert_movement)) logging.add_logger(Logging.Logger("muscle_f_ant", cpg.output_muscle_stimuli.pipe(ops.map(lambda x: x[0])))) logging.add_logger(Logging.Logger("muscle_f_post", cpg.output_muscle_stimuli.pipe(ops.map(lambda x: x[1])))) logging.start_logging() # ========= start timer ticks =========== Timer().start() def process_muscle_stimulus(ms): env.contract_anterior(ms[0]) env.contract_posterior(ms[1]) # prevent termination of streams while True: time.sleep(10)
def __init__(self, n_in, n_hids, n_out, mem_size, mem_nel, deep_out_size, bow_size=40, inps=None, dropout=None, predict_bow_out=False, seq_len=None, n_read_heads=1, n_layers=1, n_write_heads=1, train_profile=False, erase_activ=None, content_activ=None, l1_pen=None, l2_pen=None, use_reinforce=False, use_reinforce_baseline=False, n_reading_steps=2, use_gru_inp_rep=False, use_simple_rnn_inp_rep=False, use_nogru_mem2q=False, sub_mb_size=40, lambda1_rein=2e-4, lambda2_rein=2e-5, baseline_reg=1e-2, anticorrelation=None, use_layer_norm=False, recurrent_dropout_prob=-1, correlation_ws=None, hybrid_att=True, max_fact_len=7, use_dice_val=False, use_qmask=False, renormalization_scale=4.8, w2v_embed_scale=0.42, emb_scale=0.32, use_soft_att=False, use_hard_att_eval=False, use_batch_norm=False, learning_rule=None, use_loc_based_addressing=True, smoothed_diff_weights=False, use_multiscale_shifts=True, use_ff_controller=False, use_gate_quad_interactions=False, permute_order=False, wpenalty=None, noise=None, w2v_embed_path=None, glove_embed_path=None, learn_embeds=True, use_last_hidden_state=False, use_adv_indexing=False, use_bow_input=True, use_out_mem=True, use_deepout=True, use_q_mask=False, use_inp_content=True, rnd_indxs=None, address_size=0, learn_h0=False, use_context=False, debug=False, controller_activ=None, mem_gater_activ=None, weight_initializer=None, bias_initializer=None, use_cost_mask=True, use_bow_cost_mask=True, theano_function_mode=None, batch_size=32, use_noise=False, reinforce_decay=0.9, softmax=False, use_mask=False, name="ntm_model", **kwargs): assert deep_out_size is not None, ("Size of the deep output " " should not be None.") if sub_mb_size is None: sub_mb_size = batch_size assert sub_mb_size <= batch_size, "batch_size should be greater than sub_mb_size" self.hybrid_att = hybrid_att self.state = locals() self.use_context = use_context self.eps = 1e-8 self.use_mask = use_mask self.l1_pen = l1_pen self.l2_pen = l2_pen self.l2_penalizer = None self.emb_scale = emb_scale self.w2v_embed_path = w2v_embed_path self.glove_embed_path = glove_embed_path self.learn_embeds = learn_embeds self.exclude_params = {} self.use_gate_quad_interactions = use_gate_quad_interactions self.reinforce_decay = reinforce_decay self.max_fact_len = max_fact_len self.lambda1_reinf = lambda1_rein self.lambda2_reinf = lambda2_rein self.use_reinforce_baseline = use_reinforce_baseline self.use_reinforce = use_reinforce self.use_gru_inp_rep = use_gru_inp_rep self.use_simple_rnn_inp_rep = use_simple_rnn_inp_rep self.use_q_mask = use_q_mask self.use_inp_content = use_inp_content self.rnd_indxs = rnd_indxs self.use_layer_norm = use_layer_norm self.recurrent_dropout_prob = recurrent_dropout_prob self.n_reading_steps = n_reading_steps self.sub_mb_size = sub_mb_size self.predict_bow_out = predict_bow_out self.correlation_ws = correlation_ws self.smoothed_diff_weights = smoothed_diff_weights self.use_soft_att = use_soft_att self.use_hard_att_eval = use_hard_att_eval if anticorrelation and n_read_heads < 2: raise ValueError("Anti-correlation of the attention weight" " do not support the multiple read heads.") self.anticorrelation = anticorrelation if self.predict_bow_out: if len(inps) <= 4: raise ValueError( "The number of inputs should be greater than 4.") if l2_pen: self.l2_penalizer = L2Penalty(self.l2_pen) #assert use_bow_input ^ use_gru_inp_rep ^ self.use_simple_rnn_inp_rep, \ # "You should either use GRU or BOW input." self.renormalization_scale = renormalization_scale self.w2v_embed_scale = w2v_embed_scale self.baseline_reg = baseline_reg self.inps = inps self.erase_activ = erase_activ self.use_ff_controller = use_ff_controller self.content_activ = content_activ self.use_bow_cost_mask = use_bow_cost_mask self.ntm_outs = None self.theano_function_mode = theano_function_mode self.n_in = n_in self.dropout = dropout self.wpenalty = wpenalty self.noise = noise self.bow_size = bow_size self.use_last_hidden_state = use_last_hidden_state self.use_loc_based_addressing = use_loc_based_addressing self.train_profile = train_profile self.use_nogru_mem2q = use_nogru_mem2q self.use_qmask = use_qmask self.permute_order = permute_order self.use_batch_norm = use_batch_norm # Use this if you have a ff-controller because otherwise this is not effective: self.n_layers = n_layers if self.use_reinforce: reinforceCls = REINFORCE if not self.use_reinforce_baseline: reinforceCls = REINFORCEBaselineExt self.Reinforce = reinforceCls(lambda1_reg=self.lambda1_reinf, lambda2_reg=self.lambda2_reinf, decay=self.reinforce_decay) self.ReaderReinforce = \ ReinforcePenalty(reinf_level=self.lambda1_reinf, maxent_level=self.lambda2_reinf, use_reinforce_baseline=self.use_reinforce_baseline) self.dice_val = None if use_dice_val: self.dice_val = sharedX(1.) self.use_dice_val = use_dice_val if bow_size is None: raise ValueError("bow_size should be specified.") if name is None: raise ValueError("name should not be empty.") self.n_hids = n_hids self.mem_size = mem_size self.use_deepout = use_deepout self.mem_nel = mem_nel self.n_out = n_out self.use_out_mem = use_out_mem self.use_multiscale_shifts = use_multiscale_shifts self.address_size = address_size self.n_read_heads = n_read_heads self.n_write_heads = n_write_heads self.learn_h0 = learn_h0 self.use_adv_indexing = use_adv_indexing self.softmax = softmax self.use_bow_input = use_bow_input self.use_cost_mask = use_cost_mask self.deep_out_size = deep_out_size self.controller_activ = controller_activ self.mem_gater_activ = mem_gater_activ self.weight_initializer = weight_initializer self.bias_initializer = bias_initializer if batch_size: self.batch_size = batch_size else: self.batch_size = inps[0].shape[1] #assert self.batch_size >= self.sub_mb_size, ("Minibatch size should be " # " greater than the sub minibatch size") self.comp_grad_fn = None self.name = name self.use_noise = use_noise self.train_timer = Timer("Training function") self.gradfn_timer = Timer("Gradient function") self.grads_timer = Timer("Computing the grads") self.reset() self.seq_len = TT.iscalar('seq_len') self.__convert_inps_to_list() if debug: if self.use_gru_inp_rep or self.use_bow_input: self.seq_len.tag.test_value = self.inps[ 0].tag.test_value.shape[1] else: self.seq_len.tag.test_value = self.inps[ 0].tag.test_value.shape[0] self.learning_rule = learning_rule if self.predict_bow_out: self.bow_out_w = TT.fscalar("bow_out_w") if debug: self.bow_out_w.tag.test_value = np.float32(1.0) else: self.bow_out_w = 0
class TestMeter(object): """Measures testing stats (only desc).""" def __init__(self, max_iter): self.max_iter = max_iter self.iter_timer = Timer() # Current minibatch errors (smoothed over a window) self.mb_top1_err = ScalarMeter(cfg.LOG_PERIOD) self.mb_top5_err = ScalarMeter(cfg.LOG_PERIOD) # Min errors (over the full test set) self.min_top1_err = 100.0 self.min_top5_err = 100.0 # Number of misclassified examples self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def reset(self, min_errs=False): if min_errs: self.min_top1_err = 100.0 self.min_top5_err = 100.0 self.iter_timer.reset() self.mb_top1_err.reset() self.mb_top5_err.reset() self.num_top1_mis = 0 self.num_top5_mis = 0 self.num_samples = 0 def iter_tic(self): self.iter_timer.tic() def iter_toc(self): self.iter_timer.toc() def update_stats(self, top1_err, top5_err, mb_size): self.mb_top1_err.add_value(top1_err) self.mb_top5_err.add_value(top5_err) self.num_top1_mis += top1_err * mb_size self.num_top5_mis += top5_err * mb_size self.num_samples += mb_size def get_iter_stats(self, cur_epoch, cur_iter): mem_usage = gpu_mem_usage() iter_stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "iter": "{}/{}".format(cur_iter + 1, self.max_iter), "time_avg": self.iter_timer.average_time, "time_diff": self.iter_timer.diff, "top1_err": self.mb_top1_err.get_win_median(), "top5_err": self.mb_top5_err.get_win_median(), "mem": int(np.ceil(mem_usage)), } return iter_stats def log_iter_stats(self, cur_epoch, cur_iter): if (cur_iter + 1) % cfg.LOG_PERIOD != 0: return stats = self.get_iter_stats(cur_epoch, cur_iter) logger.info(logging.dump_log_data(stats, "test_iter")) def get_epoch_stats(self, cur_epoch): top1_err = self.num_top1_mis / self.num_samples top5_err = self.num_top5_mis / self.num_samples self.min_top1_err = min(self.min_top1_err, top1_err) self.min_top5_err = min(self.min_top5_err, top5_err) mem_usage = gpu_mem_usage() stats = { "epoch": "{}/{}".format(cur_epoch + 1, cfg.OPTIM.MAX_EPOCH), "time_avg": self.iter_timer.average_time, "top1_err": top1_err, "top5_err": top5_err, "min_top1_err": self.min_top1_err, "min_top5_err": self.min_top5_err, "mem": int(np.ceil(mem_usage)), } return stats def log_epoch_stats(self, cur_epoch): stats = self.get_epoch_stats(cur_epoch) logger.info(logging.dump_log_data(stats, "test_epoch"))