def test_split_random_punct(self): for i in range(1, 100): r = random.randint(1, i) word = u'' for j in range(1, r): p = random.randint(0, len(string.punctuation) - 1) word += string.punctuation[p] result = split([word]) self.assertEqual(list(result), list(word))
def test_split_random_punct(self): for i in range(1, 100): r = random.randint(1, i) word = u'' for j in range(1, r): p = random.randint(0, len(string.punctuation) - 1) word += string.punctuation[p] result = split([word]) expected = list(word) self.assertEqual(list(result), expected)
def test_split(self): """Split tokens into terms using the following rules: 0. All digits are discarded 1. A sequence beginning with an lc letter must be followed by lc letters 2. A sequence beginning with an uc letter can be followed by either: a. One or more uc letters b. One or more lc letters """ cases = dict({ 'camelCase': ('camel', 'Case'), 'CamelCase': ('Camel', 'Case'), 'camel2case': ('camel', '2', 'case'), 'camel2Case': ('camel', '2', 'Case'), 'word': ('word', ), 'HTML': ('HTML', ), 'readXML': ('read', 'XML'), 'XMLRead': ('XML', 'Read'), 'firstMIDDLELast': ('first', 'MIDDLE', 'Last'), 'CFile': ('C', 'File'), 'Word2Word34': ('Word', '2', 'Word', '34'), 'WORD123Word': ('WORD', '123', 'Word'), 'c_amelCase': ('c', '_', 'amel', 'Case'), 'CamelC_ase': ('Camel', 'C', '_', 'ase'), 'camel2_case': ('camel', '2', '_', 'case'), 'camel_2Case': ('camel', '_', '2', 'Case'), 'word': ('word', ), 'HTML': ('HTML', ), 'read_XML': ('read', '_', 'XML'), 'XML_Read': ('XML', '_', 'Read'), 'firstM_IDDL_ELast': ('first', 'M', '_', 'IDDL', '_', 'E', 'Last'), 'the_CFile': ('the', '_', 'C', 'File'), 'Word_2_Word3_4': ('Word', '_', '2', '_', 'Word', '3', '_', '4'), 'WO_RD123W_or_d': ('WO', '_', 'RD', '123', 'W', '_', 'or', '_', 'd'), 'hypen-ation': ('hypen', '-', 'ation'), '*****@*****.**': ('email', '@', 'address', '.', 'com'), '/*comment*/': ('/', '*', 'comment', '*', '/'), 'word1': ('word', '1'), 'Word1': ('Word', '1'), 'f1': ('f', '1'), '1ms': ('1', 'ms'), 'F1': ('F', '1'), 'WORD_THING': ('WORD', '_', 'THING'), '@': ('@', ), 'WORD_THING_ONE': ('WORD', '_', 'THING', '_', 'ONE'), 'wordThing_one': ('word', 'Thing', '_', 'one'), '_w': ('_', 'w'), '_wt': ('_', 'wt'), '_wT': ('_', 'w', 'T'), '_WT': ('_', 'WT'), '_Wt': ('_', 'Wt'), 'wt_': ('wt', '_'), '<5>': ('<', '5', '>'), '==': ('=', '='), 'x=5;': ('x', '=', '5', ';'), '2.0': ('2', '.', '0'), '2,0': ('2', ',', '0'), '//test': ('/', '/', 'test'), 'Boolean.FALSE': ('Boolean', '.', 'FALSE'), 'word': ('word', ), 'word.': ('word', '.'), '.word.': ('.', 'word', '.'), '.word': ('.', 'word'), 'WordThing.': ('Word', 'Thing', '.'), 'WordThing.FLAG': ('Word', 'Thing', '.', 'FLAG'), 'WordThing.cmd': ('Word', 'Thing', '.', 'cmd'), 'WordThing.cmdDo': ('Word', 'Thing', '.', 'cmd', 'Do'), 'System.out.println': ('System', '.', 'out', '.', 'println'), 'System.out.println();': ('System', '.', 'out', '.', 'println', '(', ')', ';'), 'x++': ('x', '+', '+'), '++x': ('+', '+', 'x'), "n't": ('n', "'", 't'), u"test💩word": ('test', u'💩', 'word'), u'Erwin_Schrödinger': ('Erwin', '_', u'Schrödinger') }) for term, expected in cases.items(): result = split([term]) self.assertEqual(tuple(result), expected) """
def test_split_creates_generator(self): """ Split tokens creates a generator """ result = split('butts') self.assertIsInstance(result, type(x for x in list()))
def test_split(self): """Split tokens into terms using the following rules: 0. All digits are discarded 1. A sequence beginning with an lc letter must be followed by lc letters 2. A sequence beginning with an uc letter can be followed by either: a. One or more uc letters b. One or more lc letters """ cases = dict({ 'camelCase': ('camel', 'Case'), 'CamelCase': ('Camel', 'Case'), 'camel2case': ('camel', '2', 'case'), 'camel2Case': ('camel', '2', 'Case'), 'word': ('word', ), 'HTML': ('HTML', ), 'readXML': ('read', 'XML'), 'XMLRead': ('XML', 'Read'), 'firstMIDDLELast': ('first', 'MIDDLE', 'Last'), 'CFile': ('C', 'File'), 'Word2Word34': ('Word', '2', 'Word', '34'), 'WORD123Word': ('WORD', '123', 'Word'), 'c_amelCase': ('c', '_', 'amel', 'Case'), 'CamelC_ase': ('Camel', 'C', '_', 'ase'), 'camel2_case': ('camel', '2', '_', 'case'), 'camel_2Case': ('camel', '_', '2', 'Case'), 'word': ('word', ), 'HTML': ('HTML', ), 'read_XML': ('read', '_', 'XML'), 'XML_Read': ('XML', '_', 'Read'), 'firstM_IDDL_ELast': ('first', 'M', '_', 'IDDL', '_', 'E', 'Last'), 'the_CFile': ('the', '_', 'C', 'File'), 'Word_2_Word3_4': ('Word', '_', '2', '_', 'Word', '3', '_', '4'), 'WO_RD123W_or_d': ('WO', '_', 'RD', '123', 'W', '_', 'or', '_', 'd'), 'hypen-ation': ('hypen', '-', 'ation'), '*****@*****.**': ('email', '@', 'address', '.', 'com'), '/*comment*/': ('/', '*', 'comment', '*', '/'), 'word1': ('word', '1'), 'Word1': ('Word', '1'), 'f1': ('f', '1'), '1ms': ('1', 'ms'), 'F1': ('F', '1'), 'WORD_THING': ('WORD', '_', 'THING'), '@': ('@',), 'WORD_THING_ONE': ('WORD', '_', 'THING', '_', 'ONE'), 'wordThing_one': ('word', 'Thing', '_', 'one'), '_w': ('_', 'w'), '_wt': ('_', 'wt'), '_wT': ('_', 'w', 'T'), '_WT': ('_', 'WT'), '_Wt': ('_', 'Wt'), 'wt_': ('wt', '_'), '<5>': ('<', '5', '>'), '==': ('=', '='), 'x=5;': ('x', '=', '5', ';'), '2.0': ('2', '.', '0'), '2,0': ('2', ',', '0'), '//test': ('/', '/', 'test'), 'Boolean.FALSE': ('Boolean', '.', 'FALSE'), 'word': ('word', ), 'word.': ('word', '.'), '.word.': ('.', 'word', '.'), '.word': ('.', 'word'), 'WordThing.': ('Word', 'Thing', '.'), 'WordThing.FLAG': ('Word', 'Thing', '.', 'FLAG'), 'WordThing.cmd': ('Word', 'Thing', '.', 'cmd'), 'WordThing.cmdDo': ('Word', 'Thing', '.', 'cmd', 'Do'), 'System.out.println': ('System', '.', 'out', '.', 'println'), 'System.out.println();': ('System', '.', 'out', '.', 'println', '(', ')', ';'), 'x++': ('x', '+', '+'), '++x': ('+', '+', 'x'), "n't": ('n', "'", 't'), u"test💩word": ('test', u'💩', 'word'), u'Erwin_Schrödinger': ('Erwin', '_', u'Schrödinger') }) for term, expected in cases.items(): result = split([term]) self.assertEqual(tuple(result), expected) """
import numpy as np from src.activation_functions import ReLU, Softmax, Sin, Cos from src.evaluation import plot_loss_and_accuracy, accuracy from src.loss_functions import SquaredLoss, CrossEntropy from src.neural_net.layers import InputLayer, Layer from src.neural_net.network import NeuralNetwork from src.preprocessing import to_categorical, split from keras.datasets import mnist (X, Y), (Xtest, Ytest) = mnist.load_data() # Reshape & Normalize dimensions = X.shape[1] * X.shape[2] # i.e. flattened X = X.reshape((X.shape[0], dimensions)) / 255.0 Xtest = Xtest.reshape((Xtest.shape[0], dimensions)) / 255.0 (X, Y), (Xval, Yval) = split(X, Y, train_frac=0.9) Y = to_categorical(Y) hidden_layer_act = ReLU() layers = [ InputLayer(X.shape[1]), Layer(25, hidden_layer_act), Layer(25, hidden_layer_act), Layer(10, Softmax()) ] nn = NeuralNetwork(layers) nn.compile(loss_function=SquaredLoss(), metric=accuracy) history = nn.fit(X, Y,