Пример #1
0
def test_constant_features():
    learner = milk.defaultclassifier()
    features = np.ones(20).reshape((-1, 1))
    labels = np.zeros(20)
    labels[10:] += 1
    features[10:] *= -1
    learner.train(features, labels)
Пример #2
0
def trainMe(directory):
    classes = []
    labels = []
    features = []
    print('[+] Reading files')
    gestures = check_output(['ls', directory]).split()
    print('[+] Extracting features')
    for gesture in gestures:
        classes.append(gesture)
        gesture_dir = directory + '/' + gesture
        files = check_output(['ls', gesture_dir]).split()
        for filename in files:
            file_dir = gesture_dir + '/' + filename
            labels.append(len(classes))
            image = cv.imread(file_dir)
            image = resizeImage(image, width=500)
            features.append(getFVector(image))
    features = np.array(features)
    labels = np.array(labels)

    print('[+] Training')
    classifier = milk.defaultclassifier()
    model = classifier.train(features, labels)

    print('[+] Cross validation')
    confusion_matrix, names = milk.nfoldcrossvalidation(features, labels, learner=classifier)
    print('[+] Accuracy %.2f' % (float(confusion_matrix.trace())/float(confusion_matrix.sum())))

    return model, classes
def test_constant_features():
    learner = milk.defaultclassifier()
    features = np.ones(20).reshape((-1,1))
    labels = np.zeros(20)
    labels[10:] += 1
    features[10:] *= -1
    learner.train(features, labels)
Пример #4
0
def main():
    oauth_filename = os.environ.get('HOME', '') + os.sep + '.twitter_oauth'
    oauth_filename = os.path.expanduser(oauth_filename)

    oauth_token, oauth_token_secret = read_token_file(oauth_filename)
    auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY, CONSUMER_SECRET)
    twitter = Twitter(
        auth=auth,
        secure=True,
        api_version='1',
        domain='api.twitter.com'
    )

    try:
        tweets = pickle.load(open('tweets.pickle'))
    except:
        tweets = []
    print "Horay! I've got %s tweets from the file!" % len(tweets)

    # используем nltk
    featuresets = [(get_features(tweet), tweet['good']) for tweet in tweets]
    total = len(featuresets)
    train_set, test_set = featuresets[total/2:], featuresets[:total/2]

    classifier = nltk.NaiveBayesClassifier.train(train_set)
    #tree_classifier = nltk.DecisionTreeClassifier.train(train_set)
    print nltk.classify.accuracy(classifier, test_set)
    classifier.show_most_informative_features(10)
    #print nltk.classify.accuracy(tree_classifier, test_set)


    if MILK:
        # используем milk
        learner = milk.defaultclassifier()
        get_milk_keys(get_features(tweet) for tweet in tweets)
        features = [get_milk_features(tweet) for tweet in tweets]
        labels = [tweet['good'] for tweet in tweets]
        model = learner.train(features, labels)


    ids = set(tweet['id'] for tweet in tweets)

    tweet_iter = twitter.statuses.friends_timeline(count=COUNT)
    for tweet in tweet_iter:
        if tweet.get('text') and tweet['id'] not in ids:
            print '%s: %s' % (tweet['user']['name'], tweet['text'])
            print '[nltk] I think, this tweet is interesting with probability', classifier.prob_classify(get_features(tweet)).prob(True)
            if MILK:
                print '[milk] I think, this tweet is interesting with probability', model.apply(get_milk_features(tweet))
            good = raw_input('Interesting or not?\n(y/n): ') in ('y', 'Y', 'G', 'g')
            tweet['good'] = good
            tweets.append(tweet)



    pickle.dump(tweets, open('tweets.pickle', 'w'))
Пример #5
0
def main():
    oauth_filename = os.environ.get('HOME', '') + os.sep + '.twitter_oauth'
    oauth_filename = os.path.expanduser(oauth_filename)

    oauth_token, oauth_token_secret = read_token_file(oauth_filename)
    auth = OAuth(oauth_token, oauth_token_secret, CONSUMER_KEY,
                 CONSUMER_SECRET)
    twitter = Twitter(auth=auth,
                      secure=True,
                      api_version='1',
                      domain='api.twitter.com')

    try:
        tweets = pickle.load(open('tweets.pickle'))
    except:
        tweets = []
    print "Horay! I've got %s tweets from the file!" % len(tweets)

    # используем nltk
    featuresets = [(get_features(tweet), tweet['good']) for tweet in tweets]
    total = len(featuresets)
    train_set, test_set = featuresets[total / 2:], featuresets[:total / 2]

    classifier = nltk.NaiveBayesClassifier.train(train_set)
    #tree_classifier = nltk.DecisionTreeClassifier.train(train_set)
    print nltk.classify.accuracy(classifier, test_set)
    classifier.show_most_informative_features(10)
    #print nltk.classify.accuracy(tree_classifier, test_set)

    if MILK:
        # используем milk
        learner = milk.defaultclassifier()
        get_milk_keys(get_features(tweet) for tweet in tweets)
        features = [get_milk_features(tweet) for tweet in tweets]
        labels = [tweet['good'] for tweet in tweets]
        model = learner.train(features, labels)

    ids = set(tweet['id'] for tweet in tweets)

    tweet_iter = twitter.statuses.friends_timeline(count=COUNT)
    for tweet in tweet_iter:
        if tweet.get('text') and tweet['id'] not in ids:
            print '%s: %s' % (tweet['user']['name'], tweet['text'])
            print '[nltk] I think, this tweet is interesting with probability', classifier.prob_classify(
                get_features(tweet)).prob(True)
            if MILK:
                print '[milk] I think, this tweet is interesting with probability', model.apply(
                    get_milk_features(tweet))
            good = raw_input('Interesting or not?\n(y/n): ') in ('y', 'Y', 'G',
                                                                 'g')
            tweet['good'] = good
            tweets.append(tweet)

    pickle.dump(tweets, open('tweets.pickle', 'w'))
Пример #6
0
def train_ai():
		data = []
		classID = []
		features = []
		features_temp_array = []
		try: 
			data_filename = 'coinvision_feature_data.csv'
			print 'reading features and classID: ', data_filename
			f_handle = open(data_filename, 'r')
			reader = csv.reader(f_handle)
			#read data from file into arrays
			for row in reader:
				data.append(row)

			for row in range(0, len(data)):
				#print features[row][1]
				classID.append(int(data[row][0]))
				features_temp_array.append(data[row][1].split(" "))

			#removes ending element which is a space
			for x in range(len(features_temp_array)):
				features_temp_array[x].pop()
				features_temp_array[x].pop(0)

			#convert all strings in array to numbers
			temp_array = []
			for x in range(len(features_temp_array)):
				temp_array = [ float(s) for s in features_temp_array[x] ]
				features.append(temp_array)

			#make numpy arrays
			features = np.asarray(features)
			#print classID, features 
			learner = milk.defaultclassifier(mode='really-slow')
			model = learner.train(features, classID)
			pickle.dump( model, open( "coinvision_ai_model.mdl", "wb" ) )

		except:
			print "could not retrain.. bad file"
		'''
		from sklearn import svm
		model = svm.SVC(gamma=0.001, C=100.)
		model.fit(features, classID)
		pickle.dump( model, open( "coinvision_ai_model_svc.mdl", "wb" ) )

		from sklearn.neighbors import KNeighborsClassifier
		neigh = KNeighborsClassifier(n_neighbors=3)
		neigh.fit(features, classID)
		pickle.dump( model, open( "coinvision_ai_model_knn.mdl", "wb" ) )
		'''
		return 
Пример #7
0
def test_nov2010():
    # Bug submitted by Mao Ziyang
    # This was failing in 0.3.5 because SDA selected no features
    np.random.seed(222)
    features = np.random.randn(100,20)
    features[:50] *= 2
    labels = np.repeat((0,1), 50)

    classifier = milk.defaultclassifier()
    model = classifier.train(features, labels)
    new_label = model.apply(np.random.randn(20)*2)
    new_label2 = model.apply(np.random.randn(20))
    assert new_label == 0
    assert new_label2 == 1
Пример #8
0
def test_nov2010():
    # Bug submitted by Mao Ziyang
    # This was failing in 0.3.5 because SDA selected no features
    np.random.seed(222)
    features = np.random.randn(100, 20)
    features[:50] *= 2
    labels = np.repeat((0, 1), 50)

    classifier = milk.defaultclassifier()
    model = classifier.train(features, labels)
    new_label = model.apply(np.random.randn(20) * 2)
    new_label2 = model.apply(np.random.randn(20))
    assert new_label == 0
    assert new_label2 == 1
Пример #9
0
def train_ai():
    data = []
    classID = []
    features = []
    features_temp_array = []
    try:
        data_filename = 'robomow_feature_data.csv'
        print 'readind features and classID: ', data_filename
        f_handle = open(data_filename, 'r')
        reader = csv.reader(f_handle)
        #read data from file into arrays
        for row in reader:
            data.append(row)

        for row in range(0, len(data)):
            #print features[row][1]
            classID.append(int(data[row][0]))
            features_temp_array.append(data[row][1].split(" "))

        #removes ending element which is a space
        for x in range(len(features_temp_array)):
            features_temp_array[x].pop()
            features_temp_array[x].pop(0)

        #convert all strings in array to numbers
        temp_array = []
        for x in range(len(features_temp_array)):
            temp_array = [float(s) for s in features_temp_array[x]]
            features.append(temp_array)

        #make numpy arrays
        #features = np.asarray(features)
        print classID, features

        learner = milk.defaultclassifier()
        model = learner.train(features, classID)
        pickle.dump(model, open("robomow_ai_model.mdl", "wb"))
    except:
        print "could not retrain.. bad file"
        return
Пример #10
0
def train_ai():
	data = []
	classID = []
	features = []
	features_temp_array = []
	try: 
		data_filename = 'robomow_feature_data.csv'
		print 'readind features and classID: ', data_filename
		f_handle = open(data_filename, 'r')
		reader = csv.reader(f_handle)
		#read data from file into arrays
		for row in reader:
			data.append(row)

		for row in range(0, len(data)):
			#print features[row][1]
			classID.append(int(data[row][0]))
			features_temp_array.append(data[row][1].split(" "))

		#removes ending element which is a space
		for x in range(len(features_temp_array)):
			features_temp_array[x].pop()
			features_temp_array[x].pop(0)

		#convert all strings in array to numbers
		temp_array = []
		for x in range(len(features_temp_array)):
			temp_array = [ float(s) for s in features_temp_array[x] ]
			features.append(temp_array)

		#make numpy arrays
		#features = np.asarray(features)
		print classID, features 

		learner = milk.defaultclassifier()
		model = learner.train(features, classID)
		pickle.dump( model, open( "robomow_ai_model.mdl", "wb" ) )
	except:
		print "could not retrain.. bad file"
	return 
Пример #11
0
def learn_model(features, labels):
    learner = milk.defaultclassifier()
    return learner.train(features, labels)
    features = []  # features for current image
    features = list(mahotas.features.haralick(img).mean(0))  #haralick
    features += list(Zernike(img))  #zernike
    features += (list(sift(img)))  #sift
    Features.append(features)

#Apply KPCA for feature reduction
# will select best 440 features out of one image
kpca = KernelPCA(n_components=440, kernel='rbf')
Features = kpca.fit_transform(Features)

labels = [1] * len(positives) + [0] * len(negatives)  #Labels

#creating SVM Classifier
learner = milk.defaultclassifier()
#model = learner.train(Features, labels)

start = timer()
cm, names, preds = milk.nfoldcrossvalidation(Features,
                                             labels,
                                             nfolds=10,
                                             classifier=learner,
                                             return_predictions=True)
end = timer()
print(end - start)  #Time taken to perform validation

TP = cm[0][0]  # class Spliced, predicted as Spliced
TN = cm[1][1]  #class Authentic, predicted as Authentic
FN = cm[0][1]  #class Spliced, predicted as Authentic
FP = cm[1][0]  #class Authentic, predicted as Spliced
'''
@author: patrick
'''
import csv
import milk
import numpy as np
import os

f = open('country_energy.csv', 'r')
csvrows = [r for r in csv.reader(f)]
print csvrows
frows = []
for r in csvrows:
   frows.append([float(x) for x in r[3:]])          
#frows = [ [float(x) for x in r[3:]] for r in csvrows]
print frows

features = np.vstack(frows)
labels = [r[2] for r in csvrows]

#confusion_matrix, names = milk.nfoldcrossvalidation(features, labels)
classifier = milk.defaultclassifier()
model = classifier.train(features, labels)
Пример #14
0
def train_ai():
			
			data = []
			classID = []
			features = []
			features_temp_array = []
			
			'''
			#SIMPLECV
			#bows
			feature_extractors = []
			extractor_names = []
			# Training data set paths for classification(suppervised learnning)
			image_dirs = ['../coin_images/jheads/',
						  '../coin_images/jtails/',
						  '../coin_images/oheads/',
						  '../coin_images/otails/',
						  ]
			# Different class labels for multi class classification
			class_names = ['jhead','jtail','ohead', 'otail']
			
			
			#preprocess all training images
			for directory in image_dirs:
				for filename in glob.glob(directory + '/*.jpg'):
					print "Processing:", filename
					img = cv2.imread(filename)
					temp_img  = preprocess_houghlines (img, 100)
					temp_str = filename.rsplit('/')
					temp_str = temp_str[len(temp_str)-1]
					temp_str = directory + '/temp/' + temp_str
					print temp_str
					cv2.imwrite(temp_str, temp_img)
					#raw_input('press enter to continue : ')
			#sys.exit(-1)
			
			
			#build array of directories for bow
			#image_dirs2 = []
			#for directory in image_dirs:
			#	image_dirs2.append(directory + '/temp/')
			#print image_dirs2

			# Different class labels for multi class classification
			extractor_name = 'hue'
			if extractor_name == 'bow':
				feature_extractor = BOFFeatureExtractor() # feature extrator for bag of words methodology
				feature_extractor.generate(image_dirs,imgs_per_dir=40) # code book generation
			elif extractor_name == 'hue':
				feature_extractor = HueHistogramFeatureExtractor()
			elif extractor_name == 'morphology':
				feature_extractor = MorphologyFeatureExtractor()
			elif extractor_name == 'haar':
				feature_extractor = HaarLikeFeatureExtractor()
			elif extractor_name == 'edge':
				feature_extractor = EdgeHistogramFeatureExtractor()
			image_dirs2 = image_dirs
			#bow_features = BOFFeatureExtractor()
			#bow_features.generate(image_dirs2,imgs_per_dir=40, verbose=True) # code book generation
			#bow_features.generate(image_dirs2,imgs_per_dir=200,numcodes=256,sz=(11,11),img_layout=(16,16),padding=4 )
			#bow_features.save('codebook.png','bow.txt')

			#print "extractor_names:", extractor_names, feature_extractors
			# initializing classifier with appropriate feature extractors list
			#print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields()
			#raw_input('bow saved...Enter : ')
			#bow_features = None
			
			#bow_features = BOFFeatureExtractor()
			#print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields()
			#bow_features.load('bow.txt')
			#print type(bow_features), bow_features, bow_features.getFieldNames(), bow_features.getNumFields()
			feature_extractors.append(feature_extractor)
			#raw_input('bow loaded Enter : ')

			#extractor_names.append(extractor_name)
			
			classifier_name = 'naive'
			if classifier_name == 'naive':
				classifier = NaiveBayesClassifier(feature_extractors)
			elif classifier_name == 'svm':
				classifier = SVMClassifier(feature_extractors)
			elif classifier_name == 'knn':
				classifier = KNNClassifier(feature_extractors, 2)
			elif classifier_name == 'tree':
				classifier = TreeClassifier(feature_extractors)

			# train the classifier to generate hypothesis function for classification
			#print "image_dirs:", image_dirs, class_names
			classifier.train(image_dirs2,class_names,disp=None,savedata='features.txt',verbose=True)
			
			print 'classifier:', type(classifier), classifier
			raw_input('press enter to continue :')
			#pickle.dump( classifier, open( "coinvision_ai_model2.mdl", "wb" ),2 )
			#classifier.save('coinvision_ai_model.mdl')
			print 'classifier:', type(classifier), classifier
			#classifier = NaiveBayesClassifier.load('coinvision_ai_model.mdl')

			#raw_input('press enter to continue : let me try loading bow file')
			#classifier2 = NaiveBayesClassifier.load('coinvision_ai_model.mdl')
			#classifier2.setFeatureExtractors(feature_extractors)
			#print 'classifier2:', type(classifier2), classifier2
			#classifier.load("coinvision_ai_model.mdl")
			#classifier2.load('coinvision_ai_model.mdl')
			#print 'classifier:', type(classifier2), classifier2
			raw_input('press enter to continue : ')
			print 'testing ai:'
			test_images_path = "../coin_images/unclassified"
			extension = "*.jpg"

			if not test_images_path:
				path = os.getcwd() #get the current directory
			else:
				path = test_images_path

			directory = os.path.join(path, extension)
			files = glob.glob(directory)

			count = 0 # counting the total number of training images
			error = 0 # conuting the total number of misclassification by the trained classifier
			for image_file in files:
				new_image = Image(image_file)
				category = classifier.classify(new_image)
				print "image_file:", image_file + "     classified as: " + category
				if image_file[-9] == 't':
					if category == 'jhead' or category == 'ohead':
						print "INCORRECT CLASSIFICATION"
						error += 1
				if image_file[-9] == 'h':
					if category == 'jtail' or category == 'otail':
						print "INCORRECT CLASSIFICATION"
						error += 1
				count += 1
			# reporting the results
			print ' * classifier : ', classifier
			print ' * extractors :', extractor_names
			print ' *', error, 'errors out of', count
			raw_input('edned press enter to continue : ')
			return
			'''
		#try: 
			data_filename = 'coinvision_feature_data.csv'
			print 'reading features and classID: ', data_filename
			f_handle = open(data_filename, 'r')
			reader = csv.reader(f_handle)
			#read data from file into arrays
			for row in reader:
				data.append(row)

			for row in range(0, len(data)):
				#print features[row][1]
				classID.append(int(data[row][0]))
				features_temp_array.append(data[row][1].split(" "))

			#removes ending element which is a space
			for x in range(len(features_temp_array)):
				features_temp_array[x].pop()
				features_temp_array[x].pop(0)

			#convert all strings in array to numbers
			temp_array = []
			for x in range(len(features_temp_array)):
				temp_array = [ float(s) for s in features_temp_array[x] ]
				features.append(temp_array)

			#make numpy arrays
			features = np.asarray(features)
			#print classID, features 

			
			learner = milk.defaultclassifier(mode='really-slow')
			model = learner.train(features, classID)
			pickle.dump( model, open( "coinvision_ai_model.mdl", "wb" ) )
			

		#except:
			print "could not retrain.. bad file"
			
			from sklearn import svm
			model = svm.SVC(gamma=0.001, C=100.)
			model.fit(features, classID)
			pickle.dump( model, open( "coinvision_ai_model_svc.mdl", "wb" ) )
			
			from sklearn.neighbors import KNeighborsClassifier
			neigh = KNeighborsClassifier(n_neighbors=3)
			neigh.fit(features, classID)
			pickle.dump( neigh, open( "coinvision_ai_model_knn.mdl", "wb" ) )
			
			from sklearn.svm import LinearSVC
			clf = LinearSVC()
			clf = clf.fit(features, classID)
			pickle.dump( clf, open( "coinvision_ai_model_lr.mdl", "wb" ) )
			
			from sklearn.linear_model import LogisticRegression
			clf2 = LogisticRegression().fit(features, classID)
			pickle.dump( clf2 , open( "coinvision_ai_model_clf2.mdl", "wb" ) )
		
			return 
Пример #15
0
def train_model((features,labels)):
    import milk
    learner = milk.defaultclassifier()
    return learner.train(features, labels)
Пример #16
0
mypda = mlpy.Pda()
mypda.compute(xtr,ytr)
print "pda: ", mypda.predict(xts)
print "correct class:", classID[id_index]


#print np.shape(xtr), xtr.ndim, xtr.dtype
#print np.shape(ytr), ytr.ndim, ytr.dtype
#print np.shape(xts), xts.ndim, xts.dtype

#milk
#Help on function feature_selection_simple in module milk.supervised.defaultlearner:

#selector = feature_selection_simple()

learner = milk.defaultclassifier(mode='medium', multi_strategy='1-vs-1', expanded=False)
model = learner.train(xtr, ytr)
print "milk: ", model.apply(xts[0])


#from sklearn.externals import joblib
#joblib.dump(model, 'saved_model.pkl') 
#model2 = joblib.load('saved_model.pkl') 
#print "milk: ", model2.apply(xts[0])

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(leaf_size=30, n_neighbors=5, p=2,
           warn_on_equidistant=True, weights='distance')

model3 = knn.fit(xtr, ytr)
print "knn sclearn: ", model3.predict(xts)
Пример #17
0
def learn_model(features, labels):
    learner = milk.defaultclassifier()
    return learner.train(features, labels)
def learn_model(features, labels):
    learner = milk.defaultclassifier(mode='really-slow')
    return learner.train(features, labels)