def test_exclude_tags(): lst = [] pipe = L.list_sink(lst) pipe = L.exclude_tags(pipe, 'fun') pipe1 = L.taggify(pipe, 'warn') pipe2 = L.taggify(pipe, 'fun') pipe1.send({'message': 'take care'}) pipe2.send({'message': 'haahaa'}) assert len(lst) == 1, 'wrong number of messages got through' assert lst[0]['message'] == 'take care', 'wrong message got through'
def test_tagging(): lst = [] pipe = L.list_sink(lst) pipe = L.taggify(pipe, tags=['bla', 'blubb']) pipe.send({}) assert len(lst) == 1, 'nothing added to sink' tags = lst[0]['tags'] assert tags == ['bla', 'blubb'], 'tags did not get through: %s' % tags pipe = L.taggify(pipe, tags='hopp') pipe.send({}) tags = lst[-1]['tags'] assert tags == ['hopp', 'bla', 'blubb'], 'tags did not get through: %s' % tags
def pretrain(self, schedule): super(DAE, self).pretrain(schedule=schedule) p = self.params.as_numpy_array() pretrained = schedule["pretrained"] # How many parameters in the unrolled model? _dec = [] _enc = [0] self.psize = 0 for layer in self: _enc.append(layer.shape[0] * layer.shape[1] + layer.shape[1]) _dec.append(layer.shape[0] * layer.shape[1] + layer.shape[0]) self.psize += _enc[-1] + _dec[-1] self.enc = np.cumsum(_enc) _dec.append(0) _dec.reverse() self.dec = np.cumsum(_dec) + self.enc[-1] # Build up encoder and decoder self.encoder = [] self.params = gzeros(self.psize) for layer, (c1, c2) in izip(self, izip(self.enc[:-1], self.enc[1:])): self.encoder.append(layer) self.params[c1:c2] = p[c1:c2] layer.p = self.params[c1:c2] self.decoder = [] for layer, (c1, c2) in izip(self[-1::-1], izip(self.dec[:-1], self.dec[1:])): l = layer.transpose(self.params[c1:c2]) if pretrained: l.p[:l.m_end] = layer.p[:layer.m_end].reshape( layer.shape).T.ravel() self.decoder.append(l) # Fix missing activations of decoder for i, layer in enumerate(self[-2::-1]): self.decoder[i].activ = layer.activ self.decoder[-1].activ = idnty msg = {"msg": "DAE unrolled: %s" % self} munk.taggify(self.logging, "pretty").send(msg)
def pretrain(self, schedule): super(DAE, self).pretrain(schedule=schedule) p = self.params.as_numpy_array() pretrained = schedule["pretrained"] # How many parameters in the unrolled model? _dec = [] _enc = [0] self.psize = 0 for layer in self: _enc.append(layer.shape[0] * layer.shape[1] + layer.shape[1]) _dec.append(layer.shape[0] * layer.shape[1] + layer.shape[0]) self.psize += _enc[-1] + _dec[-1] self.enc = np.cumsum(_enc) _dec.append(0) _dec.reverse() self.dec = np.cumsum(_dec) + self.enc[-1] # Build up encoder and decoder self.encoder = [] self.params = gzeros(self.psize) for layer, (c1, c2) in izip(self, izip(self.enc[:-1], self.enc[1:])): self.encoder.append(layer) self.params[c1:c2] = p[c1:c2] layer.p = self.params[c1:c2] self.decoder = [] for layer, (c1, c2) in izip(self[-1::-1], izip(self.dec[:-1], self.dec[1:])): l = layer.transpose(self.params[c1:c2]) if pretrained: l.p[: l.m_end] = layer.p[: layer.m_end].reshape(layer.shape).T.ravel() self.decoder.append(l) # Fix missing activations of decoder for i, layer in enumerate(self[-2::-1]): self.decoder[i].activ = layer.activ self.decoder[-1].activ = idnty msg = {"msg": "DAE unrolled: %s" % self} munk.taggify(self.logging, "pretty").send(msg)
def train(self, schedule): train = [schedule["train"][0], schedule["train"][1]] valid = None if not schedule.get("valid") else [schedule["valid"][0], schedule["valid"][1]] assert (valid is not None) == ("valid" in schedule["eval"]), "Confusion about validation set!" opt_schedule = schedule["opt"] pp = {"type" : str(self)} munk.taggify(self.logging, "pretty").send(pp) log = munk.add_keyvalue(self.logging, "layer", "Stack") epochs = opt_schedule["epochs"] if epochs > 0: opt_schedule["f"] = self.score opt_schedule["fprime"] = self.grad if "eval_score" in opt_schedule: self._eval_score = opt_schedule["eval_score"] opt_schedule["eval_score"] = self.evaluate_score opt, evals, peeks = prepare_opt(opt_schedule, self.params, schedule, train, valid) stop = opt_schedule["stop"] if "peeks" in opt_schedule: peek_iv = opt_schedule["peek_intervall"] peek_files = {} for p in opt_schedule["peeks"]: peek_files[p] = p + ".peek" else: peek_iv = epochs + 1 for i, info in enumerate(opt): if (i+1) % stop == 0: for e in evals: info[e] = evals[e](self.params) info = replace_gnumpy_data(info) log.send(info) if i+1 == epochs: break if (i+1) % peek_iv == 0: for p in peeks: prediction, inputs = peeks[p](self.params) np.savez(peek_files[p], prediction, inputs) pp = {"msg": "Writing peek file %s"%peek_files[p]} munk.taggify(self.logging, "pretty").send(pp) else: pp = {"msg": "NO FINETUNING of stack"} munk.taggify(self.logging, "pretty").send(pp) _params = self.params.as_numpy_array().tolist() info = dict(params=_params, shape=self.__repr__()) log.send(info)
def pretrain(self, schedule): train = [schedule["train"][0], schedule["train"][1]] valid = None if not schedule.get("valid") else [schedule["valid"][0], schedule["valid"][1]] assert (valid is not None) == ("valid" in schedule["eval"]), "Confusion about validation set!" for i, (layer, sched) in enumerate(izip(self, self.stack)): pt_params = layer.pt_init(**sched) opt_schedule = sched["opt"] pp = {"layer":i, "type":str(layer)} munk.taggify(self.logging, "pretty").send(pp) log = munk.add_keyvalue(self.logging, "layer", i) epochs = opt_schedule["epochs"] if epochs > 0: opt_schedule["f"] = layer.pt_score opt_schedule["fprime"] = layer.pt_grad opt, evals, peeks = prepare_opt(opt_schedule, pt_params, schedule, train, valid) stop = opt_schedule["stop"] for j, info in enumerate(opt): if (j+1) % stop == 0: for e in evals: info[e] = evals[e](pt_params) info = replace_gnumpy_data(info) log.send(info) if (j+1) == epochs: break else: pp = {"msg": "NO PRETRAINING of layer %i"%i} munk.taggify(self.logging, "pretty").send(pp) info = layer.pt_done(pt_params, **sched) pt_params = None log.send(info) # move data forward, save in temporary hdf5 if i < (len(self) - 1): nxt_name = strftime("%Y-%m-%d-%H:%M:%S") + "_L" + str(i+1) + "_TMP.h5" nxt = h5py.File(nxt_name) pp = {"msg": "Take care of temporary " + nxt_name} munk.taggify(self.logging, "pretty").send(pp) # if a validation set is available, move it forward, too. if valid: valid[0] = self.next_hdf5(layer, valid[0], "validation", nxt, chunk=512) train[0] = self.next_hdf5(layer, train[0], "train", nxt, chunk=512)