def test_constant_features(): learner = milk.defaultclassifier() features = np.ones(20).reshape((-1, 1)) labels = np.zeros(20) labels[10:] += 1 features[10:] *= -1 learner.train(features, labels)
def trainMe(directory): classes = [] labels = [] features = [] print('[+] Reading files') gestures = check_output(['ls', directory]).split() print('[+] Extracting features') for gesture in gestures: classes.append(gesture) gesture_dir = directory + '/' + gesture files = check_output(['ls', gesture_dir]).split() for filename in files: file_dir = gesture_dir + '/' + filename labels.append(len(classes)) image = cv.imread(file_dir) image = resizeImage(image, width=500) features.append(getFVector(image)) features = np.array(features) labels = np.array(labels) print('[+] Training') classifier = milk.defaultclassifier() model = classifier.train(features, labels) print('[+] Cross validation') confusion_matrix, names = milk.nfoldcrossvalidation(features, labels, learner=classifier) print('[+] Accuracy %.2f' % (float(confusion_matrix.trace())/float(confusion_matrix.sum()))) return model, classes
def test_constant_features(): learner = milk.defaultclassifier() features = np.ones(20).reshape((-1,1)) labels = np.zeros(20) labels[10:] += 1 features[10:] *= -1 learner.train(features, labels)
def main(): oauth_filename = os.environ.get('HOME', '') + os.sep + '.twitter_oauth' oauth_filename = os.path.expanduser(oauth_filename) oauth_token, oauth_token_secret = read_token_file(oauth_filename) auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET) twitter = Twitter( auth=auth, secure=True, api_version='1', domain='api.twitter.com' ) try: tweets = pickle.load(open('tweets.pickle')) except: tweets = [] print "Horay! I've got %s tweets from the file!" % len(tweets) # используем nltk featuresets = [(get_features(tweet), tweet['good']) for tweet in tweets] total = len(featuresets) train_set, test_set = featuresets[total/2:], featuresets[:total/2] classifier = nltk.NaiveBayesClassifier.train(train_set) #tree_classifier = nltk.DecisionTreeClassifier.train(train_set) print nltk.classify.accuracy(classifier, test_set) classifier.show_most_informative_features(10) #print nltk.classify.accuracy(tree_classifier, test_set) if MILK: # используем milk learner = milk.defaultclassifier() get_milk_keys(get_features(tweet) for tweet in tweets) features = [get_milk_features(tweet) for tweet in tweets] labels = [tweet['good'] for tweet in tweets] model = learner.train(features, labels) ids = set(tweet['id'] for tweet in tweets) tweet_iter = twitter.statuses.friends_timeline(count=COUNT) for tweet in tweet_iter: if tweet.get('text') and tweet['id'] not in ids: print '%s: %s' % (tweet['user']['name'], tweet['text']) print '[nltk] I think, this tweet is interesting with probability', classifier.prob_classify(get_features(tweet)).prob(True) if MILK: print '[milk] I think, this tweet is interesting with probability', model.apply(get_milk_features(tweet)) good = raw_input('Interesting or not?\n(y/n): ') in ('y', 'Y', 'G', 'g') tweet['good'] = good tweets.append(tweet) pickle.dump(tweets, open('tweets.pickle', 'w'))
def main(): oauth_filename = os.environ.get('HOME', '') + os.sep + '.twitter_oauth' oauth_filename = os.path.expanduser(oauth_filename) oauth_token, oauth_token_secret = read_token_file(oauth_filename) auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET) twitter = Twitter(auth=auth, secure=True, api_version='1', domain='api.twitter.com') try: tweets = pickle.load(open('tweets.pickle')) except: tweets = [] print "Horay! I've got %s tweets from the file!" % len(tweets) # используем nltk featuresets = [(get_features(tweet), tweet['good']) for tweet in tweets] total = len(featuresets) train_set, test_set = featuresets[total / 2:], featuresets[:total / 2] classifier = nltk.NaiveBayesClassifier.train(train_set) #tree_classifier = nltk.DecisionTreeClassifier.train(train_set) print nltk.classify.accuracy(classifier, test_set) classifier.show_most_informative_features(10) #print nltk.classify.accuracy(tree_classifier, test_set) if MILK: # используем milk learner = milk.defaultclassifier() get_milk_keys(get_features(tweet) for tweet in tweets) features = [get_milk_features(tweet) for tweet in tweets] labels = [tweet['good'] for tweet in tweets] model = learner.train(features, labels) ids = set(tweet['id'] for tweet in tweets) tweet_iter = twitter.statuses.friends_timeline(count=COUNT) for tweet in tweet_iter: if tweet.get('text') and tweet['id'] not in ids: print '%s: %s' % (tweet['user']['name'], tweet['text']) print '[nltk] I think, this tweet is interesting with probability', classifier.prob_classify( get_features(tweet)).prob(True) if MILK: print '[milk] I think, this tweet is interesting with probability', model.apply( get_milk_features(tweet)) good = raw_input('Interesting or not?\n(y/n): ') in ('y', 'Y', 'G', 'g') tweet['good'] = good tweets.append(tweet) pickle.dump(tweets, open('tweets.pickle', 'w'))
def train_ai(): data = [] classID = [] features = [] features_temp_array = [] try: data_filename = 'coinvision_feature_data.csv' print 'reading features and classID: ', data_filename f_handle = open(data_filename, 'r') reader = csv.reader(f_handle) #read data from file into arrays for row in reader: data.append(row) for row in range(0, len(data)): #print features[row][1] classID.append(int(data[row][0])) features_temp_array.append(data[row][1].split(" ")) #removes ending element which is a space for x in range(len(features_temp_array)): features_temp_array[x].pop() features_temp_array[x].pop(0) #convert all strings in array to numbers temp_array = [] for x in range(len(features_temp_array)): temp_array = [ float(s) for s in features_temp_array[x] ] features.append(temp_array) #make numpy arrays features = np.asarray(features) #print classID, features learner = milk.defaultclassifier(mode='really-slow') model = learner.train(features, classID) pickle.dump( model, open( "coinvision_ai_model.mdl", "wb" ) ) except: print "could not retrain.. bad file" ''' from sklearn import svm model = svm.SVC(gamma=0.001, C=100.) model.fit(features, classID) pickle.dump( model, open( "coinvision_ai_model_svc.mdl", "wb" ) ) from sklearn.neighbors import KNeighborsClassifier neigh = KNeighborsClassifier(n_neighbors=3) neigh.fit(features, classID) pickle.dump( model, open( "coinvision_ai_model_knn.mdl", "wb" ) ) ''' return
def test_nov2010(): # Bug submitted by Mao Ziyang # This was failing in 0.3.5 because SDA selected no features np.random.seed(222) features = np.random.randn(100,20) features[:50] *= 2 labels = np.repeat((0,1), 50) classifier = milk.defaultclassifier() model = classifier.train(features, labels) new_label = model.apply(np.random.randn(20)*2) new_label2 = model.apply(np.random.randn(20)) assert new_label == 0 assert new_label2 == 1
def test_nov2010(): # Bug submitted by Mao Ziyang # This was failing in 0.3.5 because SDA selected no features np.random.seed(222) features = np.random.randn(100, 20) features[:50] *= 2 labels = np.repeat((0, 1), 50) classifier = milk.defaultclassifier() model = classifier.train(features, labels) new_label = model.apply(np.random.randn(20) * 2) new_label2 = model.apply(np.random.randn(20)) assert new_label == 0 assert new_label2 == 1
def train_ai(): data = [] classID = [] features = [] features_temp_array = [] try: data_filename = 'robomow_feature_data.csv' print 'readind features and classID: ', data_filename f_handle = open(data_filename, 'r') reader = csv.reader(f_handle) #read data from file into arrays for row in reader: data.append(row) for row in range(0, len(data)): #print features[row][1] classID.append(int(data[row][0])) features_temp_array.append(data[row][1].split(" ")) #removes ending element which is a space for x in range(len(features_temp_array)): features_temp_array[x].pop() features_temp_array[x].pop(0) #convert all strings in array to numbers temp_array = [] for x in range(len(features_temp_array)): temp_array = [float(s) for s in features_temp_array[x]] features.append(temp_array) #make numpy arrays #features = np.asarray(features) print classID, features learner = milk.defaultclassifier() model = learner.train(features, classID) pickle.dump(model, open("robomow_ai_model.mdl", "wb")) except: print "could not retrain.. bad file" return
def train_ai(): data = [] classID = [] features = [] features_temp_array = [] try: data_filename = 'robomow_feature_data.csv' print 'readind features and classID: ', data_filename f_handle = open(data_filename, 'r') reader = csv.reader(f_handle) #read data from file into arrays for row in reader: data.append(row) for row in range(0, len(data)): #print features[row][1] classID.append(int(data[row][0])) features_temp_array.append(data[row][1].split(" ")) #removes ending element which is a space for x in range(len(features_temp_array)): features_temp_array[x].pop() features_temp_array[x].pop(0) #convert all strings in array to numbers temp_array = [] for x in range(len(features_temp_array)): temp_array = [ float(s) for s in features_temp_array[x] ] features.append(temp_array) #make numpy arrays #features = np.asarray(features) print classID, features learner = milk.defaultclassifier() model = learner.train(features, classID) pickle.dump( model, open( "robomow_ai_model.mdl", "wb" ) ) except: print "could not retrain.. bad file" return
def learn_model(features, labels): learner = milk.defaultclassifier() return learner.train(features, labels)
features = [] # features for current image features = list(mahotas.features.haralick(img).mean(0)) #haralick features += list(Zernike(img)) #zernike features += (list(sift(img))) #sift Features.append(features) #Apply KPCA for feature reduction # will select best 440 features out of one image kpca = KernelPCA(n_components=440, kernel='rbf') Features = kpca.fit_transform(Features) labels = [1] * len(positives) + [0] * len(negatives) #Labels #creating SVM Classifier learner = milk.defaultclassifier() #model = learner.train(Features, labels) start = timer() cm, names, preds = milk.nfoldcrossvalidation(Features, labels, nfolds=10, classifier=learner, return_predictions=True) end = timer() print(end - start) #Time taken to perform validation TP = cm[0][0] # class Spliced, predicted as Spliced TN = cm[1][1] #class Authentic, predicted as Authentic FN = cm[0][1] #class Spliced, predicted as Authentic FP = cm[1][0] #class Authentic, predicted as Spliced
''' @author: patrick ''' import csv import milk import numpy as np import os f = open('country_energy.csv', 'r') csvrows = [r for r in csv.reader(f)] print csvrows frows = [] for r in csvrows: frows.append([float(x) for x in r[3:]]) #frows = [ [float(x) for x in r[3:]] for r in csvrows] print frows features = np.vstack(frows) labels = [r[2] for r in csvrows] #confusion_matrix, names = milk.nfoldcrossvalidation(features, labels) classifier = milk.defaultclassifier() model = classifier.train(features, labels)
def train_ai(): data = [] classID = [] features = [] features_temp_array = [] ''' #SIMPLECV #bows feature_extractors = [] extractor_names = [] # Training data set paths for classification(suppervised learnning) image_dirs = ['../coin_images/jheads/', '../coin_images/jtails/', '../coin_images/oheads/', '../coin_images/otails/', ] # Different class labels for multi class classification class_names = ['jhead','jtail','ohead', 'otail'] #preprocess all training images for directory in image_dirs: for filename in glob.glob(directory + '/*.jpg'): print "Processing:", filename img = cv2.imread(filename) temp_img = preprocess_houghlines (img, 100) temp_str = filename.rsplit('/') temp_str = temp_str[len(temp_str)-1] temp_str = directory + '/temp/' + temp_str print temp_str cv2.imwrite(temp_str, temp_img) #raw_input('press enter to continue : ') #sys.exit(-1) #build array of directories for bow #image_dirs2 = [] #for directory in image_dirs: # image_dirs2.append(directory + '/temp/') #print image_dirs2 # Different class labels for multi class classification extractor_name = 'hue' if extractor_name == 'bow': feature_extractor = BOFFeatureExtractor() # feature extrator for bag of words methodology feature_extractor.generate(image_dirs,imgs_per_dir=40) # code book generation elif extractor_name == 'hue': feature_extractor = HueHistogramFeatureExtractor() elif extractor_name == 'morphology': feature_extractor = MorphologyFeatureExtractor() elif extractor_name == 'haar': feature_extractor = HaarLikeFeatureExtractor() elif extractor_name == 'edge': feature_extractor = EdgeHistogramFeatureExtractor() image_dirs2 = image_dirs #bow_features = BOFFeatureExtractor() #bow_features.generate(image_dirs2,imgs_per_dir=40, verbose=True) # code book generation #bow_features.generate(image_dirs2,imgs_per_dir=200,numcodes=256,sz=(11,11),img_layout=(16,16),padding=4 ) #bow_features.save('codebook.png','bow.txt') #print "extractor_names:", extractor_names, feature_extractors # initializing classifier with appropriate feature extractors list #print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields() #raw_input('bow saved...Enter : ') #bow_features = None #bow_features = BOFFeatureExtractor() #print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields() #bow_features.load('bow.txt') #print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields() feature_extractors.append(feature_extractor) #raw_input('bow loaded Enter : ') #extractor_names.append(extractor_name) classifier_name = 'naive' if classifier_name == 'naive': classifier = NaiveBayesClassifier(feature_extractors) elif classifier_name == 'svm': classifier = SVMClassifier(feature_extractors) elif classifier_name == 'knn': classifier = KNNClassifier(feature_extractors, 2) elif classifier_name == 'tree': classifier = TreeClassifier(feature_extractors) # train the classifier to generate hypothesis function for classification #print "image_dirs:", image_dirs, class_names classifier.train(image_dirs2,class_names,disp=None,savedata='features.txt',verbose=True) print 'classifier:', type(classifier), classifier raw_input('press enter to continue :') #pickle.dump( classifier, open( "coinvision_ai_model2.mdl", "wb" ),2 ) #classifier.save('coinvision_ai_model.mdl') print 'classifier:', type(classifier), classifier #classifier = NaiveBayesClassifier.load('coinvision_ai_model.mdl') #raw_input('press enter to continue : let me try loading bow file') #classifier2 = NaiveBayesClassifier.load('coinvision_ai_model.mdl') #classifier2.setFeatureExtractors(feature_extractors) #print 'classifier2:', type(classifier2), classifier2 #classifier.load("coinvision_ai_model.mdl") #classifier2.load('coinvision_ai_model.mdl') #print 'classifier:', type(classifier2), classifier2 raw_input('press enter to continue : ') print 'testing ai:' test_images_path = "../coin_images/unclassified" extension = "*.jpg" if not test_images_path: path = os.getcwd() #get the current directory else: path = test_images_path directory = os.path.join(path, extension) files = glob.glob(directory) count = 0 # counting the total number of training images error = 0 # conuting the total number of misclassification by the trained classifier for image_file in files: new_image = Image(image_file) category = classifier.classify(new_image) print "image_file:", image_file + " classified as: " + category if image_file[-9] == 't': if category == 'jhead' or category == 'ohead': print "INCORRECT CLASSIFICATION" error += 1 if image_file[-9] == 'h': if category == 'jtail' or category == 'otail': print "INCORRECT CLASSIFICATION" error += 1 count += 1 # reporting the results print ' * classifier : ', classifier print ' * extractors :', extractor_names print ' *', error, 'errors out of', count raw_input('edned press enter to continue : ') return ''' #try: data_filename = 'coinvision_feature_data.csv' print 'reading features and classID: ', data_filename f_handle = open(data_filename, 'r') reader = csv.reader(f_handle) #read data from file into arrays for row in reader: data.append(row) for row in range(0, len(data)): #print features[row][1] classID.append(int(data[row][0])) features_temp_array.append(data[row][1].split(" ")) #removes ending element which is a space for x in range(len(features_temp_array)): features_temp_array[x].pop() features_temp_array[x].pop(0) #convert all strings in array to numbers temp_array = [] for x in range(len(features_temp_array)): temp_array = [ float(s) for s in features_temp_array[x] ] features.append(temp_array) #make numpy arrays features = np.asarray(features) #print classID, features learner = milk.defaultclassifier(mode='really-slow') model = learner.train(features, classID) pickle.dump( model, open( "coinvision_ai_model.mdl", "wb" ) ) #except: print "could not retrain.. bad file" from sklearn import svm model = svm.SVC(gamma=0.001, C=100.) model.fit(features, classID) pickle.dump( model, open( "coinvision_ai_model_svc.mdl", "wb" ) ) from sklearn.neighbors import KNeighborsClassifier neigh = KNeighborsClassifier(n_neighbors=3) neigh.fit(features, classID) pickle.dump( neigh, open( "coinvision_ai_model_knn.mdl", "wb" ) ) from sklearn.svm import LinearSVC clf = LinearSVC() clf = clf.fit(features, classID) pickle.dump( clf, open( "coinvision_ai_model_lr.mdl", "wb" ) ) from sklearn.linear_model import LogisticRegression clf2 = LogisticRegression().fit(features, classID) pickle.dump( clf2 , open( "coinvision_ai_model_clf2.mdl", "wb" ) ) return
def train_model((features,labels)): import milk learner = milk.defaultclassifier() return learner.train(features, labels)
mypda = mlpy.Pda() mypda.compute(xtr,ytr) print "pda: ", mypda.predict(xts) print "correct class:", classID[id_index] #print np.shape(xtr), xtr.ndim, xtr.dtype #print np.shape(ytr), ytr.ndim, ytr.dtype #print np.shape(xts), xts.ndim, xts.dtype #milk #Help on function feature_selection_simple in module milk.supervised.defaultlearner: #selector = feature_selection_simple() learner = milk.defaultclassifier(mode='medium', multi_strategy='1-vs-1', expanded=False) model = learner.train(xtr, ytr) print "milk: ", model.apply(xts[0]) #from sklearn.externals import joblib #joblib.dump(model, 'saved_model.pkl') #model2 = joblib.load('saved_model.pkl') #print "milk: ", model2.apply(xts[0]) from sklearn.neighbors import KNeighborsClassifier knn = KNeighborsClassifier(leaf_size=30, n_neighbors=5, p=2, warn_on_equidistant=True, weights='distance') model3 = knn.fit(xtr, ytr) print "knn sclearn: ", model3.predict(xts)
def learn_model(features, labels): learner = milk.defaultclassifier(mode='really-slow') return learner.train(features, labels)