def test_reversible_encoding(): """Encoding a text and decoding it produces the same result""" text = "For the glory for mankind" encoder = Encoder([text]) coded = encoder.encodetext(text) decoded = encoder.decodeindexes(coded) print("Original text: %s" % text) print("Encoded text: " + str(coded)) print("Decoded text: %s" % decoded) assert text == decoded
def test_consistent_encoding(): """When two encoders are created with different corpus that share the same tokens, encoders are equal""" corpus1 = [ "For the glory of mankind", "God's in his heaven. All's right with the world" ] corpus2 = [ "For the glory of God", "mankind's in his world. All's right with the heaven" ] encoder1 = Encoder(corpus1) encoder2 = Encoder(corpus2) assert encoder1 == encoder2
def test_hypertrain_loadcheckpoints(): """A previously generated checkpoints file can be used to continue the hyperoptimization""" modelclass = SmallWavenet corpus = Corpus([ "This is a very small corpus for testing the hypertrain procedure.", "Hope it works!!!" ]) encoder = Encoder(corpus, CharTokenizer) checkpointsfile = DATAFOLDER + "checkpoints" with NamedTemporaryFile("r") as tempfile: tempdir = mkdtemp() tmpname = tempfile.name copyfile(checkpointsfile, tmpname) model = hypertrain(modelclass, encoder, corpus, tempdir, n_calls=15, verbose=2, valmask=[False, True], patience=1, maxepochs=10, checkpointfile=tmpname) rmtree(tempdir) assert model is not None
def test_writer_beamsearch(): """Beam search works as expected""" mockmodel = MockModel() corpus = Corpus(["abc"]) encoder = Encoder(corpus=corpus, tokenizer=CharTokenizer()) writer = Writer(mockmodel, encoder, creativity=0, beamsize=3, batchsize=3) seed = np.array([0, 0]) expected = [0, 0, 0] obtained = writer.beamsearch(seed) print("Expected", expected) print("Obtained", obtained) assert obtained == expected
def train(corpus, corpusformat, encoderfile, modelfile, architecture, tokenizer, trials, tmpmodels, checkpoint, maxepochs): """Trains a Neurowriter model""" # Load corpus corpus = FORMATTERSBYNAME[corpusformat](corpus) print("Training with corpus:", corpus[0][0:1000]) # Encoding encoder = Encoder( corpus, tokenizerbyname(tokenizer) if tokenizer is not None else None) encoder.save(encoderfile) print("Computed encoder:", encoder.char2index) # Prepare temporary files if tmpmodels is None: tmpdir = tempfile.TemporaryDirectory() tmpmodels = tmpdir.name if checkpoint is None: tmpfile = tempfile.NamedTemporaryFile() checkpoint = tmpfile.name # Model training modelclass = modelbyname(architecture) model = hypertrain(modelclass, encoder, corpus, tmpmodels, n_calls=trials, verbose=2, valmask=[False] * 3 + [True], checkpointfile=checkpoint, maxepochs=maxepochs) model.save(modelfile)
def test_hypertrain_run(): """A small hypertraining procedure can be run""" modelclass = PerceptronModel corpus = Corpus([ "This is a very small corpus for testing the hypertrain procedure.", "Hope it works!!!" ]) encoder = Encoder(corpus, CharTokenizer) with NamedTemporaryFile("r") as tempfile: tempdir = mkdtemp() tmpname = tempfile.name model = hypertrain(modelclass, encoder, corpus, tempdir, n_calls=15, verbose=2, valmask=[False, True], patience=1, maxepochs=10, checkpointfile=tmpname) rmtree(tempdir) assert model is not None