def test_10(self):
        '''Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside'''
        arrs = []
        labels = []
        '''Points about the origin (located in a box of length 16 centered at origin)'''
        for i in range(0, 10):
            arr = [
                random.randint(0, 8) * np.sign(random.random() - 0.5)
                for x in range(0, 2)
            ]
            label = 'yes'
            arrs.append(arr)
            labels.append(label)
        '''Points outside the box'''
        for i in range(0, 10):
            arr = [
                random.randint(10, 20) * np.sign(random.random() - 0.5)
                for x in range(0, 2)
            ]
            label = 'no'
            arrs.append(arr)
            labels.append(label)
        '''Add some noise'''
        for i in range(0, 2):
            arr = [
                random.randint(0, 8) * np.sign(random.random() - 0.5)
                for x in range(0, 2)
            ]
            label = 'no'  # Note: this is artificially misclassified
            arrs.append(arr)
            labels.append(label)
        for i in range(0, 10):
            arr = [
                random.randint(10, 20) * np.sign(random.random() - 0.5)
                for x in range(0, 2)
            ]
            label = 'yes'  # Note: this is artificially misclassified
            arrs.append(arr)
            labels.append(label)

        ann = Ann(arrs, labels, n_h=2)
        (models, test_accuracies, test_costs) = ann.train()

        best_test_accuracy = 0
        best_i = -1
        for i in range(0, len(test_accuracies)):
            if (test_accuracies[i] > best_test_accuracy):
                best_test_accuracy = test_accuracies[i]
                best_i = i

        if (best_i > -1):
            model_name = models[i].name
            directory = '../Ann-models'
            path_to_file = directory + '/' + model_name
            if not os.path.exists(directory):
                os.makedirs(directory)
            pickle.dump(models[i], open(path_to_file, 'wb'))
        else:
            logger.error('Error!')
    def non_test_6(self):
        # Test if training works by checking that training lowers the cost for random small and medium size data-sets#

        # Small size random data-set with two labels
        arrs = []
        labels = []
        classes = ('cat', 'dog')
        for i in range(0, 1):
            print('\nTesting data-set ' + str(i))
            for m in range(0, 10):
                arr = [random.random() for x in range(0, 3)]
                label = classes[random.random() > 0.5]
                arrs.append(arr)
                labels.append(label)
            ann = Ann(
                arrs,
                labels)  # Create Ann with these train_examples and labels
            cost_before = ann.cost()
            ann.train()
            cost_after = ann.cost()
            self.assertTrue(cost_after <= cost_before)

        # Medium size random data-set with three labels
        arrs = []
        labels = []
        classes = ('cat', 'dog', 'bird')
        for i in range(0, 1):
            print('\nTesting data-set ' + str(i))
            for m in range(0, 10):
                arr = [random.random() for x in range(0, 5)]
                z = random.random()
                if (z < 0.33):
                    label = classes[0]
                elif (z >= 0.33 and z < 0.66):
                    label = classes[1]
                else:
                    label = classes[2]
                arrs.append(arr)
                labels.append(label)
            ann = Ann(
                arrs,
                labels)  # Create Ann with these train_examples and labels
            cost_before = ann.cost()
            ann.train()
            cost_after = ann.cost()
            self.assertTrue(cost_after <= cost_before)
Exemplo n.º 3
0
 def non_test_6(self):
     # Test if training works by checking that training lowers the cost for random small and medium size data-sets#
      
     # Small size random data-set with two labels
     arrs = []
     labels = []
     classes = ('cat', 'dog')
     for i in range(0, 1):
         print('\nTesting data-set ' + str(i))
         for m in range(0, 10):
             arr = [random.random() for x in range(0, 3)]
             label = classes[random.random() > 0.5]
             arrs.append(arr)
             labels.append(label)
         ann = Ann(arrs, labels)  # Create Ann with these train_examples and labels
         cost_before = ann.cost()
         ann.train()
         cost_after = ann.cost()
         self.assertTrue(cost_after <= cost_before)
          
     # Medium size random data-set with three labels
     arrs = []
     labels = []
     classes = ('cat', 'dog', 'bird')
     for i in range(0, 1):
         print('\nTesting data-set ' + str(i))
         for m in range(0, 10):
             arr = [random.random() for x in range(0, 5)]
             z = random.random()
             if (z < 0.33):
                 label = classes[0]
             elif (z >= 0.33 and z < 0.66):
                 label = classes[1]
             else:
                 label = classes[2]
             arrs.append(arr)
             labels.append(label)
         ann = Ann(arrs, labels)  # Create Ann with these train_examples and labels
         cost_before = ann.cost()
         ann.train()
         cost_after = ann.cost()
         self.assertTrue(cost_after <= cost_before)
Exemplo n.º 4
0
 def test_10(self):
     '''Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside'''
     arrs = []
     labels = []
     '''Points about the origin (located in a box of length 16 centered at origin)'''
     for i in range(0, 10):
         arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)]
         label = 'yes'
         arrs.append(arr)
         labels.append(label)
     '''Points outside the box'''
     for i in range(0, 10):
         arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)]
         label = 'no'
         arrs.append(arr)
         labels.append(label)
     '''Add some noise'''
     for i in range(0, 2):
         arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)]
         label = 'no'  # Note: this is artificially misclassified
         arrs.append(arr)
         labels.append(label)
     for i in range(0, 10):
         arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)]
         label = 'yes'  # Note: this is artificially misclassified
         arrs.append(arr)
         labels.append(label)
          
     ann = Ann(arrs, labels, n_h=2)
     (models, test_accuracies, test_costs) = ann.train()
      
     best_test_accuracy = 0
     best_i = -1
     for i in range(0, len(test_accuracies)):
         if (test_accuracies[i] > best_test_accuracy):
             best_test_accuracy = test_accuracies[i]
             best_i = i
              
     if (best_i > -1):
         model_name = models[i].name
         directory = '../Ann-models'
         path_to_file = directory + '/' + model_name
         if not os.path.exists(directory):
             os.makedirs(directory)
         pickle.dump(models[i], open(path_to_file, 'wb'))
     else:
         logger.error('Error!')
Exemplo n.º 5
0
    def test_10(self):
        """Creates a fake data-set with points labeled 'yes' around origin and points labeled 'no' outside"""
        arrs = []
        labels = []
        """Points about the origin (located in a box of length 16 centered at origin)"""
        for i in range(0, 100):
            arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)]
            label = "yes"
            arrs.append(arr)
            labels.append(label)
        """Points outside the box"""
        for i in range(0, 100):
            arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)]
            label = "no"
            arrs.append(arr)
            labels.append(label)
        """Add some noise"""
        for i in range(0, 10):
            arr = [random.randint(0, 8) * np.sign(random.random() - 0.5) for x in range(0, 2)]
            label = "no"  # Note: this is artificially misclassified
            arrs.append(arr)
            labels.append(label)
        for i in range(0, 10):
            arr = [random.randint(10, 20) * np.sign(random.random() - 0.5) for x in range(0, 2)]
            label = "yes"  # Note: this is artificially misclassified
            arrs.append(arr)
            labels.append(label)

        ann = Ann(arrs, labels, n_h=2)
        (models, test_accuracies, test_costs) = ann.train()

        best_test_accuracy = 0
        best_i = -1
        for i in range(0, len(test_accuracies)):
            if test_accuracies[i] > best_test_accuracy:
                best_test_accuracy = test_accuracies[i]
                best_i = i

        if best_i > -1:
            model_name = models[i].name
            directory = "../Ann-models"
            path_to_file = directory + "/" + model_name
            if not os.path.exists(directory):
                os.makedirs(directory)
            pickle.dump(models[i], open(path_to_file, "wb"))
        else:
            print("Error!")
Exemplo n.º 6
0
def demo_helper():
    print('\t** Learn the AND function using 0 hidden layers (logistic regression) **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('true'))
    num_hidden_layers = 0
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() == 1):
        print('\t** The AND function was learned correctly using 0 hidden layers **\n')
    else:
        print('\t** ERROR (when learning the AND function using 0 hidden layers **\n')
    
    print('\t** Learn the AND function using 1 hidden layer **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('true'))
    num_hidden_layers = 1
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() == 1):
        print('\t** The AND function was learned correctly using 1 hidden layers **\n')
    else:
        print('\t** ERROR (when learning the AND function using 1 hidden layers **\n')
        

    print('\t** Learn the XOR function using 0 hidden layers (logistic regression) **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('false'))
    num_hidden_layers = 0
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() != 1):
        print('\t** The XOR function was not learned correctly (as expected) because logistic regression (0 hidden layers) \n' + 
              'cannot create a boundary through a non-linearly separable data-set (which the XOR function is)**\n')
    else:
        print('\t** ERROR (when learning the XOR function using 0 hidden layers **\n')
    
    '''
def main():
    # An array of all text files
    dir = '../library/books/'

    # Using pickle so I don't keep re-reading these books
    print('\n\nReading books..')
    books = []
    if (os.path.exists(dir + '../my_books')):
        books = pickle.load(open(dir + '../my_books', 'rb'))
    else:
        # Just use the first 10 books
        file_names = [name for name in os.listdir(dir)][0:10]
        for file_name in file_names:
            m = re.search('(.*?)_(.*?)\.txt', file_name)
            # Get the author from the text file name
            author = re.sub(r'([A-Z])', r' \1', m.group(1)).strip()
            # Get the title from the text file name
            title = m.group(2).strip()
            f = codecs.open('../library/books/' + file_name,
                            'r',
                            encoding='utf-8',
                            errors='ignore')
            # print(author + ' ' + title)
            lines = f.readlines()
            book = Book(author, title, lines)
            books.append(book)

        pickle.dump(books, open(dir + '../my_books', 'wb'))
    for book in books:
        print(book.title + ' by ' + book.author + '\t\t has ' +
              str(len(book.sentences)) + ' sentences.')

    n = 2  # The size of our n-grams (we choose to use bi-grams)

    print('\n\nMaking a vocabulary of n-grams...')
    # Using pickle so I don't keep re-making a vocabulary
    n_gram_vocab = []
    if (os.path.exists(dir + '../my_n_grams')):
        n_gram_vocab = pickle.load(open(dir + '../my_n_grams', 'rb'))
    else:
        n_gram_vocab = {}  # Treated as a set (faster 'in' operation than list)
        for book in books:
            # print(book.author + ' ' + book.title)
            # print(len(n_gram_vocab))
            n_gram_vocab = add_to_n_gram_vocab(n_gram_vocab,
                                               book.sentences,
                                               n=n)

        # n_gram_vocab = OrderedDict(n_gram_vocab)  # Convert to an ordered list
        n_gram_vocab = list(n_gram_vocab.keys())  # Convert to an ordered list
        pickle.dump(n_gram_vocab, open(dir + '../my_n_grams', 'wb'))

    print('There are ' + str(len(n_gram_vocab)) + ' n-grams of size ' + str(n))

    print('\n\nBuilding a labeled data-set...')
    # We will do our training and testing on samples where a sample is a 5 sentence continuous text
    # Chunks are further broken down into a train and test sets by Ann
    # We look for the book with the smallest number of sentences and then get 50% of all of its 5-sentence chunks
    # For every other book, we randomly sample the same number of chunks (all labels have the same number of data points)

    arrs = []  # Holds vectorial representation of our 5-sentence chunks
    labels = [
    ]  # Holds the corresponding labels (author + title) of our chunks

    chunk_length = 5
    percentage = 0.5

    # Get minimum number of sentences across all our books
    min_num_sentences = -1
    for book in books:
        if (len(book.sentences) < min_num_sentences
                or min_num_sentences == -1):
            min_num_sentences = len(book.sentences)

    for book in books:
        # We can't start a chunk at the last 4 sentences
        num_chunks = min_num_sentences - chunk_length + 1
        this_num_sentences = len(book.sentences) - chunk_length + 1
        num_samples = int(math.floor(num_chunks * percentage))
        # Randomly pick 50% of all 5-sentence chunks
        samples = random.sample(range(0, this_num_sentences), num_samples)

        label = book.title + ' by ' + book.author
        print(label)
        # Convert our sampled 5-sentence chunks into vectors
        for sample in samples:
            # print(sample)
            # Take some 5-sentence chunk
            chunk = book.sentences[sample:sample + chunk_length + 1]
            chunk = ''.join(str(elem + ' ') for elem in chunk)
            v = sen_2_vec(chunk, n_gram_vocab, n=n)
            arrs.append(v)
            labels.append(label)

    print('\n\nTraining logistic regression classifier using Ann...')
    ann = Ann(arrs, labels, n_h=0)  # n_h=0 means we are using 0 hidden layers
    ann.train(lam=100)

    print('\n\nFinding the top 5 most distinguishing bi-grams...')
    for k in range(0, len(books)):  # Number of classes
        v = ann.Thetas[0][k, :].tolist()[0]
        s = sorted((e, i) for i, e in enumerate(v))
        s.reverse()
        print(books[k].title + ' by ' + books[k].author)
        for i in range(0, 5):
            print(n_gram_vocab[s[i][1]])
Exemplo n.º 8
0
    def test_9(self):
        # function 1 (XOR function) on 1 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false') 
        ann = Ann(arrs, labels, n_h=1)
        # Train and save model
        model = ann.train()[0][0]  # Take the first model from the list of models in the tuple
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
         
        # Load the trained model into a new neural network
        ann_from_model = Ann(model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_model.h_by_class(x), 'false')
 
        # function 2 on 2 hidden layers
        arrs2 = []
        arrs2.append([1, 1])
        arrs2.append([2, 2])
        arrs2.append([1, 3])
        arrs2.append([2, 10])
        arrs2.append([1, -1])
        arrs2.append([-2, -2])
        arrs2.append([1, -3])
        arrs2.append([-2, -10])
        labels2 = []
        labels2.append('false')
        labels2.append('false')
        labels2.append('false')
        labels2.append('false')
        labels2.append('true')
        labels2.append('true')
        labels2.append('true')
        labels2.append('true') 
        ann = Ann(arrs2, labels2, n_h=2)
        model2 = ann.train()[0][0]
        ann.validate_train()
         
        # Load the second model
        ann_from_model = Ann(model2)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs2[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs2[len(arrs2) - 1]), 'true')
        x = [1, -5]
        self.assertEqual(ann_from_model.h_by_class(x), 'true')
         
        # Load the first model again
        ann_from_model = Ann(model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_model.h_by_class(x), 'false')
         
        # Try pickling our model into a sister folder
        model_name = model.name
        directory = '../Ann-models'
        path_to_file = directory + '/' + model_name
        if not os.path.exists(directory):
            os.makedirs(directory)
        pickle.dump(model, open(path_to_file, 'wb'))
         
        # Try unpickling our model
        unpickled_model = pickle.load(open(path_to_file, 'rb'))
        # Load unpickled model and test
        ann_from_pickle = Ann(unpickled_model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_pickle.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_pickle.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_pickle.h_by_class(x), 'false')
Exemplo n.º 9
0
    def test_7(self):
        # Learn some basic functions#
        # Linearly-separable data-sets#
         
        # function 1 (AND function) on 0 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('true') 
        ann = Ann(arrs, labels, n_h=0)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
        # function 2 on 2 hidden layers
        arrs = []
        arrs.append([1, 1])
        arrs.append([2, 2])
        arrs.append([1, 3])
        arrs.append([2, 10])
        arrs.append([1, -1])
        arrs.append([-2, -2])
        arrs.append([1, -3])
        arrs.append([-2, -10])
        labels = []
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        labels.append('true') 
        ann = Ann(arrs, labels, n_h=2)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
         
         
        # Non-linearly-separable data-sets#
         
        
        # function 1 (XOR function) on 1 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false') 
        ann = Ann(arrs, labels, n_h=1)
        ann.train(it=3000)
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
         
        # function 1b (XOR function) on 1 hidden layers (with custom architecture)
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false')
        s = [4, 5]  # Custom hidden layer architecture
        ann = Ann(arrs, labels, n_h=len(s), s=s)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
             
 
        # function 1 (two nested sets) on 2 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 1])
        arrs.append([1, 1])
        arrs.append([10, 0])
        arrs.append([0, 10])
        arrs.append([110, 10])
        arrs.append([-10, 10])
        labels = []
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('false') 
        labels.append('true')
        labels.append('true')
        labels.append('true')
        labels.append('true') 
        ann = Ann(arrs, labels, n_h=0)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
Exemplo n.º 10
0
def main():
    # An array of all text files
    dir = '../library/books/'
    
    # Using pickle so I don't keep re-reading these books
    print('\n\nReading books..')
    books = []
    if (os.path.exists(dir + '../my_books')):
        books = pickle.load(open(dir + '../my_books', 'rb'))
    else:
        # Just use the first 10 books
        file_names = [name for name in os.listdir(dir)][0:10]
        for file_name in file_names:
            m = re.search('(.*?)_(.*?)\.txt', file_name)
            # Get the author from the text file name
            author = re.sub(r'([A-Z])', r' \1', m.group(1)).strip()
            # Get the title from the text file name
            title = m.group(2).strip()
            f = codecs.open('../library/books/' + file_name, 'r', encoding='utf-8', errors='ignore')
            # print(author + ' ' + title)
            lines = f.readlines()
            book = Book(author, title, lines)
            books.append(book)
        
        pickle.dump(books, open(dir + '../my_books', 'wb'))
    for book in books:
        print(book.title + ' by ' + book.author + '\t\t has ' + str(len(book.sentences)) + ' sentences.')
        
    n = 2  # The size of our n-grams (we choose to use bi-grams)
    
    print('\n\nMaking a vocabulary of n-grams...')
    # Using pickle so I don't keep re-making a vocabulary
    n_gram_vocab = []
    if (os.path.exists(dir + '../my_n_grams')):
        n_gram_vocab = pickle.load(open(dir + '../my_n_grams', 'rb'))
    else:
        n_gram_vocab = {}  # Treated as a set (faster 'in' operation than list)
        for book in books:
            # print(book.author + ' ' + book.title)
            # print(len(n_gram_vocab))
            n_gram_vocab = add_to_n_gram_vocab(n_gram_vocab, book.sentences, n=n)
        
        # n_gram_vocab = OrderedDict(n_gram_vocab)  # Convert to an ordered list
        n_gram_vocab = list(n_gram_vocab.keys())  # Convert to an ordered list
        pickle.dump(n_gram_vocab, open(dir + '../my_n_grams', 'wb'))
        
    print('There are ' + str(len(n_gram_vocab)) + ' n-grams of size ' + str(n))
    
    print('\n\nBuilding a labeled data-set...')
    # We will do our training and testing on samples where a sample is a 5 sentence continuous text
    # Chunks are further broken down into a train and test sets by Ann
    # We look for the book with the smallest number of sentences and then get 50% of all of its 5-sentence chunks
    # For every other book, we randomly sample the same number of chunks (all labels have the same number of data points)
    
    arrs = []  # Holds vectorial representation of our 5-sentence chunks
    labels = []  # Holds the corresponding labels (author + title) of our chunks
    
    chunk_length = 5
    percentage = 0.5 
    
    # Get minimum number of sentences across all our books
    min_num_sentences = -1
    for book in books:
        if (len(book.sentences) < min_num_sentences or min_num_sentences == -1):
            min_num_sentences = len(book.sentences)
        
    for book in books:
        # We can't start a chunk at the last 4 sentences
        num_chunks = min_num_sentences - chunk_length + 1
        this_num_sentences = len(book.sentences) - chunk_length + 1
        num_samples = int(math.floor(num_chunks * percentage))
        # Randomly pick 50% of all 5-sentence chunks
        samples = random.sample(range(0, this_num_sentences), num_samples)
        
        label = book.title + ' by ' + book.author
        print(label)
        # Convert our sampled 5-sentence chunks into vectors
        for sample in samples:
            # print(sample)
            # Take some 5-sentence chunk
            chunk = book.sentences[sample:sample + chunk_length + 1]
            chunk = ''.join(str(elem + ' ') for elem in chunk)
            v = sen_2_vec(chunk, n_gram_vocab, n=n)
            arrs.append(v)
            labels.append(label)
            
    print('\n\nTraining logistic regression classifier using Ann...')
    ann = Ann(arrs, labels, n_h=0)  # n_h=0 means we are using 0 hidden layers
    ann.train(lam=100)
    
    print('\n\nFinding the top 5 most distinguishing bi-grams...')
    for k in range(0, len(books)):  # Number of classes
        v = ann.Thetas[0][k, :].tolist()[0]
        s = sorted((e, i) for i, e in enumerate(v))
        s.reverse()
        print(books[k].title + ' by ' + books[k].author)
        for i in range(0, 5):
            print(n_gram_vocab[s[i][1]])
Exemplo n.º 11
0
def demo_helper():
    init_logger('debug')
    
    print('\t** Learn the AND function using 0 hidden layers (logistic regression) **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('true'))
    num_hidden_layers = 0
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() == 1):
        print('\t** The AND function was learned correctly using 0 hidden layers **\n')
    else:
        print('\t** ERROR (when learning the AND function using 0 hidden layers **\n')
    
    print('\t** Learn the AND function using 1 hidden layer **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('true'))
    num_hidden_layers = 1
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() == 1):
        print('\t** The AND function was learned correctly using 1 hidden layers **\n')
    else:
        print('\t** ERROR (when learning the AND function using 1 hidden layers **\n')
        

    print('\t** Learn the XOR function using 0 hidden layers (logistic regression) **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('false'))
    num_hidden_layers = 0
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() != 1):
        print('\t** The XOR function was not learned correctly (as expected) because logistic regression (0 hidden layers) \n' + 
              'cannot create a boundary through a non-linearly separable data-set (which the XOR function is)**\n')
    else:
        print('\t** ERROR (when learning the XOR function using 0 hidden layers **\n')
    
    print('\t** Learn the XOR function using 1 hidden layer **')
    arrs = []
    labels = []
    (arrs.append([0, 0]), labels.append('false')) 
    (arrs.append([0, 1]), labels.append('true'))
    (arrs.append([1, 0]), labels.append('true'))
    (arrs.append([1, 1]), labels.append('false'))
    num_hidden_layers = 1
    ann = Ann(arrs, labels, n_h=num_hidden_layers)
    ann.train()
    if (ann.validate_train() == 1):
        print('\t** The XOR function was learned correctly using 1 hidden layers **\n')
    else:
        print('\t** ERROR (when learning the XOR function using 1 hidden layers **\n')
    def test_9(self):
        # function 1 (XOR function) on 1 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false')
        ann = Ann(arrs, labels, n_h=1)
        # Train and save model
        model = ann.train()[0][
            0]  # Take the first model from the list of models in the tuple
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)

        # Load the trained model into a new neural network
        ann_from_model = Ann(model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_model.h_by_class(x), 'false')

        # function 2 on 2 hidden layers
        arrs2 = []
        arrs2.append([1, 1])
        arrs2.append([2, 2])
        arrs2.append([1, 3])
        arrs2.append([2, 10])
        arrs2.append([1, -1])
        arrs2.append([-2, -2])
        arrs2.append([1, -3])
        arrs2.append([-2, -10])
        labels2 = []
        labels2.append('false')
        labels2.append('false')
        labels2.append('false')
        labels2.append('false')
        labels2.append('true')
        labels2.append('true')
        labels2.append('true')
        labels2.append('true')
        ann = Ann(arrs2, labels2, n_h=2)
        model2 = ann.train()[0][0]
        ann.validate_train()

        # Load the second model
        ann_from_model = Ann(model2)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs2[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs2[len(arrs2) - 1]),
                         'true')
        x = [1, -5]
        self.assertEqual(ann_from_model.h_by_class(x), 'true')

        # Load the first model again
        ann_from_model = Ann(model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_model.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_model.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_model.h_by_class(x), 'false')

        # Try pickling our model into a sister folder
        model_name = model.name
        directory = '../Ann-models'
        path_to_file = directory + '/' + model_name
        if not os.path.exists(directory):
            os.makedirs(directory)
        pickle.dump(model, open(path_to_file, 'wb'))

        # Try unpickling our model
        unpickled_model = pickle.load(open(path_to_file, 'rb'))
        # Load unpickled model and test
        ann_from_pickle = Ann(unpickled_model)
        # Evaluate some vectors using this neural network initialized only with a model
        self.assertEqual(ann_from_pickle.h_by_class(arrs[0]), 'false')
        self.assertEqual(ann_from_pickle.h_by_class(arrs[1]), 'true')
        x = [1.1, 0.9]
        self.assertEqual(ann_from_pickle.h_by_class(x), 'false')
    def test_7(self):
        # Learn some basic functions#
        # Linearly-separable data-sets#

        # function 1 (AND function) on 0 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        ann = Ann(arrs, labels, n_h=0)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)
        # function 2 on 2 hidden layers
        arrs = []
        arrs.append([1, 1])
        arrs.append([2, 2])
        arrs.append([1, 3])
        arrs.append([2, 10])
        arrs.append([1, -1])
        arrs.append([-2, -2])
        arrs.append([1, -3])
        arrs.append([-2, -10])
        labels = []
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        ann = Ann(arrs, labels, n_h=2)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)

        # Non-linearly-separable data-sets#

        # function 1 (XOR function) on 1 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false')
        ann = Ann(arrs, labels, n_h=1)
        ann.train(it=3000)
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)

        # function 1b (XOR function) on 1 hidden layers (with custom architecture)
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 0])
        arrs.append([1, 1])
        labels = []
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('false')
        s = [4, 5]  # Custom hidden layer architecture
        ann = Ann(arrs, labels, n_h=len(s), s=s)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)

        # function 1 (two nested sets) on 2 hidden layers
        arrs = []
        arrs.append([0, 0])
        arrs.append([0, 1])
        arrs.append([1, 1])
        arrs.append([1, 1])
        arrs.append([10, 0])
        arrs.append([0, 10])
        arrs.append([110, 10])
        arrs.append([-10, 10])
        labels = []
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('false')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        labels.append('true')
        ann = Ann(arrs, labels, n_h=0)
        ann.train()
        ann.validate_train()
        # Check to see if train_accuracy is over 90%
        self.assertTrue(ann.train_accuracy() > 0.9)