def main(): global modifiers, booster_map, negator_map, bag_of_words #Creating the socket host = '' port = 9999 backlog = 5 size = 1024 server = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM, proto=0) #server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server.bind((host, port)) server.listen(5) #server.setblocking(0) input = [server, sys.stdin] #Loading the dictionaries and training data get_pol_map() booster_map = get_booster_map() negator_map = get_negator_map() modifiers = get_mod_map(booster_map, negator_map) bag_of_words = [] train_data = load_data_from_file(sys.argv[1]) #Initializing and training de classifier global svm_classifier svm_classifier = SVM(type=CLASSIFICATION, kernel=LINEAR) train_svm(svm_classifier, train_data) print "Training Completed......" running = 1 while running: inputready, outputready, exceptready = select.select(input, [], []) for s in inputready: if s == server: # handle the server socket client, address = server.accept() input.append(client) elif s == sys.stdin: # handle standard input junk = sys.stdin.readline() running = 0 else: # handle all other sockets data = s.recv(size) if data: # if str(data) == "\n": # print "Newline" # if str(data) == "\r": # print "R" # if str(data).strip("\r\n") == "": # print "Error! " + str(data).strip("\r\n") # continue s.send(hybrid_classify(str(data).strip("\n")) + '\r\n') else: s.close() input.remove(s) server.close()
def __init__(self, trainset=[]): # initializes a SVM classifier self.classifier = SVM(type=CLASSIFICATION, kernel=LINEAR) self.bag_of_words = [] self.classifier.probability = True self.train(self.classifier, trainset)
def main(): global modifiers, booster_map, negator_map, bag_of_words #Creating the socket so that it can receive a tweet and send the resulting polarity host = '' port = 9999 backlog = 5 size = 1024 server = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM, proto=0) server.bind((host, port)) server.listen(5) input = [server, sys.stdin] #Loading the dictionaries and training data get_pol_map() booster_map = get_booster_map() negator_map = get_negator_map() modifiers = get_mod_map(booster_map, negator_map) bag_of_words = [] train_data = load_data_from_file(sys.argv[1]) #Initializing and training de classifier global svm_classifier svm_classifier = SVM(type=CLASSIFICATION, kernel=LINEAR) train_svm(svm_classifier, train_data) print "Training Completed......" #Cycle that mantains the classifier running running = 1 while running: inputready, outputready, exceptready = select.select(input, [], []) #An input is detected for s in inputready: if s == server: # Handle the message if it comes from a client #sending a connection request client, address = server.accept() input.append(client) elif s == sys.stdin: # Handle standard input, so that when entered text on the #console the server stops junk = sys.stdin.readline() running = 0 else: # Handle all other socket connections, in this case clients sending tweets data = s.recv(size) if data: #Classify the tweet and send the resulting polarity s.send(hybrid_classify(str(data).strip("\n")) + '\r\n') else: #The client wishes to disconnect himself, close the input and remove him from the list s.close() input.remove(s) server.close()
def normal_test(data, type): print '----------------------------------------------------' print 'TEST FUNCTION STARTED FOR ' + type + '!' total_data_size = len(data) training_size = int(round(total_data_size/2)) test_size = training_size print 'Total Size: ' + str(total_data_size) print 'Training Size: ' + str(training_size) print 'Test Size: ' + str(test_size) print 'Training Started for ' + type + '!' classification_methods = { #uncomment based on what classification algorithm you would like to test 'NB' : NB(train=data[:training_size], baseline=MAJORITY, method=MULTINOMIAL), 'KNN2' : KNN(train=data[:training_size], baseline=MAJORITY, k=2, distance=COSINE), 'KNN3' : KNN(train=data[:training_size], baseline=MAJORITY, k=3, distance=COSINE), 'KNN4' : KNN(train=data[:training_size], baseline=MAJORITY, k=4, distance=COSINE), 'KNN5' : KNN(train=data[:training_size], baseline=MAJORITY, k=5, distance=COSINE), 'KNN6' : KNN(train=data[:training_size], baseline=MAJORITY, k=6, distance=COSINE), 'KNN7' : KNN(train=data[:training_size], baseline=MAJORITY, k=7, distance=COSINE), 'KNN8' : KNN(train=data[:training_size], baseline=MAJORITY, k=8, distance=COSINE), 'KNN9' : KNN(train=data[:training_size], baseline=MAJORITY, k=9, distance=COSINE), 'KNN10' : KNN(train=data[:training_size], baseline=MAJORITY, k=10, distance=COSINE), 'SLP1' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=1), 'SLP2' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=2), 'SLP3' : SLP(train=data[:training_size], baseline=MAJORITY, iterations=3), 'SVM' : SVM(train=data[:training_size], type=CLASSIFICATION, kernel=POLYNOMIAL), } print 'Normal Testing Started!' # uncomment to start the normal test for classification in classification_methods.keys(): #measure the time it takes to classify! start = timeit.default_timer() #normal test accuracy, precision, recall, f1 = classification_methods[classification].test(data[training_size:training_size+test_size]) stop = timeit.default_timer() print '*' + classification + '*' print 'Accuracy: ' + str(accuracy) print 'Precision: ' + str(precision) print 'Recall: ' + str(recall) print 'F1-score: ' + str(f1) print 'Time: ' + str(stop - start) print
def _learner(self): return SVM(extension='libsvm')
from pattern.web import Twitter from pattern.text.en import tag from pattern.vector import KNN, count, NaiveBayes, SVM import os, random import file_io as fio corp_dir = 'essays/original' twitter, knn, nbayes, svm = Twitter(), KNN(), NaiveBayes(), SVM() from nltk.corpus import stopwords import lsa cachedStopWords = stopwords.words("english") testSet = [] def naive(): trainingSet = [] l = lsa.getMod() dirs = [x[0] for x in os.walk(os.path.abspath(corp_dir))] for dir in dirs: label = 0 if 'low' in dir: label = -1 elif 'high' in dir: label = 1 tfiles = [] tfiles = fio.getTopLevelFiles(dir, extension='txt') train_smpl = [] if len(tfiles) > 0: train_smpl = [ tfiles[i] for i in random.sample(xrange(len(tfiles)), 13) ] for file in tfiles:
# The separation is going to be a rough approximation, obviously. # # Now imagine the following game: # - The room is filled with static, floating red and blue marbles. # - It is your task to separate them by inserting a glass panel between them. # # The 3-D space gives a lot more options. Adding more dimensions add even more options. # This is roughly what a SVM does, using kernel functions to push the separation # to a higher dimension. # Pattern includes precompiled C binaries of libsvm. # If these do not work on your system you have to compile libsvm manually. # You can also change the "SVM()" statement below with "KNN()", # so you can still follow the rest of the example. classifier = SVM() # We'll build a classifier to predict sentiment in Dutch movie reviews. # For example, "geweldige film!" (great movie) indicates a positive sentiment. # The CSV file at pattern/test/corpora/polarity-nl-bol.com.csv # contains 1,500 positive and 1,500 negative reviews. # The pattern.vector module has a shuffled() function # which we use to randomly arrange the reviews in the list: print "loading data..." data = Datasheet.load( os.path.join("..", "..", "test", "corpora", "polarity-nl-bol.com.csv")) data = shuffled(data) # We do not necessarily need Document objects as in the previous examples.
# #train for category # slp = SLP(train=data[:len(data)], baseline=MAJORITY, iterations=3) # slp.finalize() # #save # slp.save(f, True) # print '--------------------' # #training for rating rating_nlp f = os.path.join(os.path.dirname(__file__), "classifiers/rating_nlp.svm") data = [] data.extend(asDocumentReviewNLP(classification_data['musics']['reviews'])) data.extend(asDocumentReviewNLP(classification_data['movies']['reviews'])) data.extend(asDocumentReviewNLP(classification_data['games']['reviews'])) shuffle(data) svm = SVM(train=data[:len(data)], type=CLASSIFICATION, kernel=POLYNOMIAL) svm.finalize() #save svm.save(f, True) # print '--------------------' # #training for sentiment # f = os.path.join(os.path.dirname(__file__), "classifiers/sentiment.nb") # data = [] # data.extend(asDocumentReviewNLP(classification_data['musics']['reviews'])) # data.extend(asDocumentReviewNLP(classification_data['movies']['reviews'])) # data.extend(asSentiment(classification_data['games']['reviews'])) # shuffle(data) # nb = NB(train=data[:len(data)], baseline=MAJORITY, method=MULTINOMIAL, alpha=0.0001) # nb.finalize() # #save
""" Returns a bag-of-words vector for the given string. """ v = {} v.update(count(words(s))) return v train = (("cat", "A cat has whiskers"), ("cat", "A cat says meow"), ("cat", "the animal was purring softly"), ("dog", "A dog is an animal that says woof"), ("dog", "Why is that dog still barking?"), ("dog", "He happily wagged his tail")) # A robust, all-round classification algorithm is SVM. # If SVM doesn't work on your machine, use SLP (= simple neural net). classifier = SVM() for name, s in train: classifier.train(v(s), type=name) print classifier.classify(v("the animal is purring and meowing")) print classifier.classify(v("woof!")) print # ------------------------------------------------------------------------------------ # Vectors can be constructed in many different ways; # what features you include will influence how accurate the classifier is. # For example, in the example above there is no way to match "barking" to "bark" # (for the classifier they are different words). # A good strategy is to use character n-grams as features: # sequences of n successive characters (usually n=3).
def main(): global modifiers, booster_map, negator_map, bag_of_words get_pol_map() booster_map = get_booster_map() negator_map = get_negator_map() modifiers = get_mod_map(booster_map, negator_map) bag_of_words = [] train_data, test_data, crowd_data = load_data_from_file( sys.argv[1], sys.argv[2], sys.argv[3]) global svm_classifier svm_classifier = SVM(type=CLASSIFICATION, kernel=LINEAR) train_svm(svm_classifier, train_data) print "Training Completed......" hits = 0.0 misses = 0.0 counter = 0.0 confussion = {} global EMO, BOW, AI EMO = 0 BOW = 0 AI = 0 for n, tweet in enumerate(test_data): class1 = hybrid_classify(tweet['message']) counter += 1 if (class1 == tweet['sentiment']): hits += 1 else: misses += 1 confussion[(class1, tweet['sentiment'])] = confussion.get( (class1, tweet['sentiment']), 0) + 1 Accuracy = hits / (hits + misses) Recall = (hits + misses) / counter F1 = (2 * Accuracy * Recall) / (Accuracy + Recall) print "" print "TASS Test Results......" print "Accuracy: ", str(Accuracy) print "Recall: ", str(Recall) print "F1-Score: ", str(F1) print "Layer Summary:" print "Emoticon Layer: ", str(EMO) print "BOW Layer: ", str(BOW) print "SVM Layer: ", str(AI) print "Confussion Matrix:" for elem in confussion.items(): print elem[0], "\t", str(elem[1]) hits = 0.0 misses = 0.0 counter = 0.0 EMO = 0 BOW = 0 AI = 0 confussion = {} for n, tweet in enumerate(crowd_data): class1 = hybrid_classify(tweet['message']) counter += 1 if class1 == tweet['sentiment']: hits += 1 else: misses += 1 confussion[(class1, tweet['sentiment'])] = confussion.get( (class1, tweet['sentiment']), 0) + 1 Accuracy = hits / (hits + misses) Recall = (hits + misses) / counter F1 = (2 * Accuracy * Recall) / (Accuracy + Recall) print "" print "Crowd Test Results......" print "Accuracy: ", str(Accuracy) print "Recall: ", str(Recall) print "F1-Score: ", str(F1) print "Layer Summary:" print "Emoticon Layer: ", str(EMO) print "BOW Layer: ", str(BOW) print "SVM Layer: ", str(AI) print "Confussion Matrix:" for elem in confussion.items(): print elem[0], "\t", str(elem[1])