def __call__(self): # for slow iterator while True: batch = self.data.next() if batch: self.prev_batch = batch break else: if self.prev_batch: batch = self.prev_batch break # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() rvals = self.train_fn() for schedule in self.schedules: schedule(self, rvals[-1]) self.update_fn() g_ed = time.time() self.state["lr"] = float(self.lr) cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state["trainFreq"] == 0: msg = ".. iter %4d cost %.3f" vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += " " + prop + " %.2e" vals += [float(numpy.array(rvals[dx]))] msg += " step time %s whole time %s lr %.2e" vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)] print msg % tuple(vals) self.step += 1 ret = dict( [ ("cost", float(cost)), ("error", float(cost)), ("lr", float(self.lr)), ("time_step", float(g_ed - g_st)), ("whole_time", float(whole_time)), ] + zip(self.prop_names, rvals) ) return ret
def __call__(self): df_st = time.time() batch = self.data.next() df_et = time.time() assert batch # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() lr_val = self.lr.get_value() cutoff_val = self.cutoff.get_value() rvals = self.train_fn() for schedule in self.schedules: schedule(self, rvals[-1]) self.update_fn() g_ed = time.time() self.state['lr'] = lr_val cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state['trainFreq'] == 0: msg = '.. iter %4d cost %.3f' vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += ' '+prop+' %.2e' vals += [float(numpy.array(rvals[dx]))] msg += ' dload %s step time %s whole time %s lr %.2e co %.2e' vals += [print_time(df_et-df_st), print_time(g_ed - g_st), print_time(time.time() - self.step_timer), lr_val, cutoff_val] print msg % tuple(vals) self.step += 1 if self.state['cutoff_adapt']: print 'gn_log_ave ' , self.gnorm_log_ave.get_value(), ' gn_log2_ave ', self.gnorm_log2_ave.get_value(), ' gs ', self.cutoff_adapt_steps.get_value() ret = dict([('cost', float(cost)), ('error', float(cost)), ('lr', lr_val), ('cutoff', cutoff_val), ('time_step', float(g_ed - g_st)), ('whole_time', float(whole_time))]+zip(self.prop_names, rvals)) return ret
def __call__(self): # for slow iterator while True: batch = self.data.next() if batch: self.prev_batch = batch break else: if self.prev_batch: batch = self.prev_batch break # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() rvals = self.train_fn() for schedule in self.schedules: schedule(self, rvals[-1]) self.update_fn() g_ed = time.time() self.state['lr'] = float(self.lr) cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state['trainFreq'] == 0: msg = '.. iter %4d cost %.3f' vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += ' ' + prop + ' %.2e' vals += [float(numpy.array(rvals[dx]))] msg += ' step time %s whole time %s lr %.2e' vals += [ print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr) ] print msg % tuple(vals) self.step += 1 ret = dict([('cost', float(cost)), ('error', float(cost)), ('lr', float(self.lr)), ('time_step', float(g_ed - g_st)), ('whole_time', float(whole_time))] + zip(self.prop_names, rvals)) return ret
def __call__(self): batch = self.data.next() assert batch # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu #only suitable for this mode if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() rvals = self.train_fn() ############################################################ #exported_grad = self.export_grad_fn() #print exported_grad ############################################################ for schedule in self.schedules: schedule(self, rvals[-1]) self.update_fn() g_ed = time.time() self.state['lr'] = float(self.lr) cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state['trainFreq'] == 0: msg = '.. iter %4d cost %.3f' vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += ' '+prop+' %.2e' vals += [float(numpy.array(rvals[dx]))] msg += ' step time %s whole time %s lr %.2e' vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)] print msg % tuple(vals) self.step += 1 ret = dict([('cost', float(cost)), ('error', float(cost)), ('lr', float(self.lr)), ('time_step', float(g_ed - g_st)), ('whole_time', float(whole_time))]+zip(self.prop_names, rvals)) return ret
def validate(self): rvals = self.model.validate(self.valid_data) msg = '** %d validation:' % self.valid_id print_mem('validate') self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state['validFreq'] for k, v in rvals: msg = msg + ' ' + k + ':%f ' % float(v) self.timings['valid'+k][pos] = float(v) self.state['valid'+k] = float(v) msg += 'whole time %s' % print_time(time.time() - self.start_time) msg += ' patience %d' % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = '** %d train:' % (self.valid_id - 1) for k, v in rvals: msg = msg + ' ' + k + ':%6.3f ' % float(v) self.timings['fulltrain' + k] = float(v) self.state['fulltrain' + k] = float(v) print msg rvals = valid_rvals self.state['validtime'] = float(time.time() - self.start_time)/60. # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state['bvalidcost'] > cost: self.state['bvalidcost'] = float(cost) for k, v in rvals: self.state['bvalid'+k] = float(v) self.state['bstep'] = int(self.step) self.state['btime'] = int(time.time() - self.start_time) self.test() elif numpy.random.rand(1) > self.state['rand_test_inclusion']: print 'Shouldn''t test, but you got lucky', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v self.test() else: print 'No testing', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v print_mem('validate') if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def validate(self): rvals = self.model.validate(self.valid_data) msg = "** %d validation:" % self.valid_id self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state["validFreq"] for k, v in rvals: msg = msg + " " + k + ":%f " % float(v) self.timings["valid" + k][pos] = float(v) self.state["valid" + k] = float(v) msg += "whole time %s" % print_time(time.time() - self.start_time) msg += " patience %d" % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = "** %d train:" % (self.valid_id - 1) for k, v in rvals: msg = msg + " " + k + ":%6.3f " % float(v) self.timings["fulltrain" + k] = float(v) self.state["fulltrain" + k] = float(v) print msg rvals = valid_rvals self.state["validtime"] = float(time.time() - self.start_time) / 60.0 # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state["bvalidcost"] > cost: self.state["bvalidcost"] = float(cost) for k, v in rvals: self.state["bvalid" + k] = float(v) self.state["bstep"] = int(self.step) self.state["btime"] = int(time.time() - self.start_time) self.test() else: print "No testing", cost, ">", self.state["bvalidcost"] for k, v in self.state.items(): if "test" in k: print k, v print_mem("validate") if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def validate(self): rvals = self.model.validate(self.valid_data) msg = '** %d validation:' % self.valid_id self.valid_id += 1 self.batch_start_time = time.time() pos = self.step // self.state['validFreq'] for k, v in rvals: msg = msg + ' ' + k + ':%f ' % float(v) self.timings['valid' + k][pos] = float(v) self.state['valid' + k] = float(v) msg += 'whole time %s' % print_time(time.time() - self.start_time) msg += ' patience %d' % self.patience print msg if self.train_cost: valid_rvals = rvals rvals = self.model.validate(self.train_data, True) msg = '** %d train:' % (self.valid_id - 1) for k, v in rvals: msg = msg + ' ' + k + ':%6.3f ' % float(v) self.timings['fulltrain' + k] = float(v) self.state['fulltrain' + k] = float(v) print msg rvals = valid_rvals self.state['validtime'] = float(time.time() - self.start_time) / 60. # Just pick the first thing that the cost returns cost = rvals[0][1] if self.state['bvalidcost'] > cost: self.state['bvalidcost'] = float(cost) for k, v in rvals: self.state['bvalid' + k] = float(v) self.state['bstep'] = int(self.step) self.state['btime'] = int(time.time() - self.start_time) self.test() else: print 'No testing', cost, '>', self.state['bvalidcost'] for k, v in self.state.items(): if 'test' in k: print k, v print_mem('validate') if self.validate_postprocess: return self.validate_postprocess(cost) return cost
def __call__(self): batch = self.data.next() assert batch # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu sampleN = self.state['sampleN'] myL = int(1.5*len(batch['y'])) xi = [] for i in xrange(self.state['num_systems']): xi.append(batch['x'+str(i)].squeeze()) samples, probs = self.sampler(sampleN,myL,1,*xi) #samples, probs = self.sampler(sampleN,myL,1,batch['x'].squeeze()) y,b = getUnique(samples, batch['y'], self.state) b = numpy.array(b,dtype='float32') # p = probs.sum(axis=0) # p = [math.exp(-i) for i in p] # p = [i/sum(p) for i in p] # print p # print b.mean() # print (b*p).mean() Y,YM = getYM(y, self.state) # print b # print Y # print YM diffN = len(b) for i in xrange(self.state['num_systems']): X = numpy.zeros((batch['x'+str(i)].shape[0], diffN), dtype='int64') batch['x'+str(i)] = batch['x'+str(i)]+X X = numpy.zeros((batch['x'+str(i)].shape[0], diffN), dtype='float32') batch['x_mask'+str(i)] = batch['x_mask'+str(i)]+X batch['y'] = Y batch['y_mask'] = YM batch['b'] = b # if not hasattr(self,'Last'): # self.Last = True # self.lastbatch = batch # else: # if self.Last: # batch = self.lastbatch # self.Last = False # else: # self.lastbatch = batch # self.Last = True # print batch['y'] if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() rvals = self.train_fn() ############################################################ #exported_grad = self.export_grad_fn() #print exported_grad ############################################################ for schedule in self.schedules: schedule(self, rvals[-1]) self.update_fn() g_ed = time.time() self.state['lr'] = float(self.lr) cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state['trainFreq'] == 0: msg = '.. iter %4d cost %.3f' vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += ' '+prop+' %.2e' vals += [float(numpy.array(rvals[dx]))] msg += ' step time %s whole time %s lr %.2e' vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)] print msg % tuple(vals) self.step += 1 ret = dict([('cost', float(cost)), ('error', float(cost)), ('lr', float(self.lr)), ('time_step', float(g_ed - g_st)), ('whole_time', float(whole_time))]+zip(self.prop_names, rvals)) return ret
def __call__(self): batch = self.data.next() assert batch null_inputs = sum(batch["x"].flatten() == self.null_word) / float(len(batch["x"][0])) # replace occurrences of <null> with </s> (<null> should be last word in sentence) for i in range(1, len(batch["x"]) - 1): batch["x_mask"][i + 1][batch["x"][i] == self.null_word] = 0 batch["x"][batch["x"] == self.null_word] = 0 # if <null> was only word in sentence, add it back in to prevent empty input batch["x"][0][batch["x"][0] == 0] = self.null_word if self.state["rolling_vocab"]: # Assumes batch is a dictionary batch["x"] = replace_array(batch["x"], self.model.large2small_src) batch["y"] = replace_array(batch["y"], self.model.large2small_trgt) # Perturb the data (! and the model) if isinstance(batch, dict): batch = self.model.perturb(**batch) else: batch = self.model.perturb(*batch) # Load the dataset into GPU # Note: not the most efficient approach in general, as it involves # each batch is copied individually on gpu if isinstance(batch, dict): for gdata in self.gdata: gdata.set_value(batch[gdata.name], borrow=True) else: for gdata, data in zip(self.gdata, batch): gdata.set_value(data, borrow=True) # Run the trianing function g_st = time.time() rvals = self.train_fn() for schedule in self.schedules: schedule(self, rvals[-1]) if null_inputs > 0.5: self.update_fn_lm() else: self.update_fn() g_ed = time.time() self.state["lr"] = float(self.lr) cost = rvals[-1] self.old_cost = cost whole_time = time.time() - self.step_timer if self.step % self.state["trainFreq"] == 0: msg = ".. iter %4d cost %.3f" vals = [self.step, cost] for dx, prop in enumerate(self.prop_names): msg += " " + prop + " %.2e" vals += [float(numpy.array(rvals[dx]))] msg += " step time %s whole time %s lr %.2e" vals += [print_time(g_ed - g_st), print_time(time.time() - self.step_timer), float(self.lr)] print msg % tuple(vals) self.step += 1 ret = dict( [ ("cost", float(cost)), ("error", float(cost)), ("lr", float(self.lr)), ("time_step", float(g_ed - g_st)), ("whole_time", float(whole_time)), ] + zip(self.prop_names, rvals) ) return ret