def test_load_fake_lfw_people(): lfw_people = load_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=3) # The data is croped around the center as a rectangular bounding box # arounthe the face. Colors are converted to gray levels: assert_equal(lfw_people.data.shape, (10, 62, 47)) # the target is array of person integer ids assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2]) # names of the persons can be found using the target_names array expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez'] assert_array_equal(lfw_people.target_names, expected_classes) # It is possible to ask for the original data without any croping or color # conversion and not limit on the number of picture per person lfw_people = load_lfw_people(data_home=SCIKIT_LEARN_DATA, resize=None, slice_=None, color=True) assert_equal(lfw_people.data.shape, (17, 250, 250, 3)) # the ids and class names are the same as previously assert_array_equal(lfw_people.target, [0, 0, 1, 6, 5, 6, 3, 6, 0, 3, 6, 1, 2, 4, 5, 1, 2]) assert_array_equal(lfw_people.target_names, ['Abdelatif Smith', 'Abhati Kepler', 'Camara Alvaro', 'Chen Dupont', 'John Lee', 'Lin Bauman', 'Onur Lopez'])
def test_load_fake_lfw_people(): lfw_people = load_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=3) # The data is croped around the center as a rectangular bounding box # arounthe the face. Colors are converted to gray levels: assert_equal(lfw_people.data.shape, (10, 62, 47)) # the target is array of person integer ids assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2]) # names of the persons can be found using the target_names array expected_classes = ['Abdelatif Smith', 'Abhati Kepler', 'Onur Lopez'] assert_array_equal(lfw_people.target_names, expected_classes) # It is possible to ask for the original data without any croping or color # conversion lfw_people = load_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=3, resize=None, slice_=None, color=True) assert_equal(lfw_people.data.shape, (10, 250, 250, 3)) # the ids and class names are the same as previously assert_array_equal(lfw_people.target, [2, 0, 1, 0, 2, 0, 2, 1, 1, 2]) assert_array_equal(lfw_people.target_names, expected_classes)
def test_load_lfw_people(): if not os.path.exists(os.path.join(get_data_home(), 'lfw_home')): # skip this test is the data has not already been previously # downloaded to avoid having tests rely on the availability of a # fast internet connection # to download the data, run the face recognition / verification # examples or call fetch_lfw_people function from an interactive shell # for instance raise SkipTest lfw_people = load_lfw_people(min_faces_per_person=100) # only 5 person have more than 100 pictures each in the dataset top_classes = [ 'Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Tony Blair' ] assert_array_equal(lfw_people.target_names, top_classes) # default slice is a rectangular shape around the face, removing # most of the background assert_equal(lfw_people.data.shape, (1140, 62, 47)) # person ids have been shuffled to avoid having the photo ordered by # alphabetical ordering as in the default tarball layout assert_equal(lfw_people.target.shape, (1140, )) assert_array_equal(lfw_people.target[:5], [2, 3, 1, 4, 1]) # it is possible to slice the data in different ways and to resize the # outpout without changing the width / heigh ratio lfw_people = load_lfw_people(min_faces_per_person=100, slice_=(slice(50, 200), slice(50, 200)), resize=0.1) assert_equal(lfw_people.data.shape, (1140, 15, 15)) # it is also possible to load the color version of the data, in that # case the color channels are stored in the last dimension of the data lfw_people = load_lfw_people(min_faces_per_person=100, color=True) assert_equal(lfw_people.data.shape, (1140, 62, 47, 3))
def test_load_lfw_people(): if not os.path.exists(os.path.join(get_data_home(), 'lfw_home')): # skip this test is the data has not already been previously # downloaded to avoid having tests rely on the availability of a # fast internet connection # to download the data, run the face recognition / verification # examples or call fetch_lfw_people function from an interactive shell # for instance raise SkipTest lfw_people = load_lfw_people(min_faces_per_person=100) # only 5 person have more than 100 pictures each in the dataset top_classes = ['Colin Powell', 'Donald Rumsfeld', 'George W Bush', 'Gerhard Schroeder', 'Tony Blair'] assert_array_equal(lfw_people.target_names, top_classes) # default slice is a rectangular shape around the face, removing # most of the background assert_equal(lfw_people.data.shape, (1140, 62, 47)) # person ids have been shuffled to avoid having the photo ordered by # alphabetical ordering as in the default tarball layout assert_equal(lfw_people.target.shape, (1140,)) assert_array_equal(lfw_people.target[:5], [2, 3, 1, 4, 1]) # it is possible to slice the data in different ways and to resize the # outpout without changing the width / heigh ratio lfw_people = load_lfw_people(min_faces_per_person=100, slice_=(slice(50, 200), slice(50, 200)), resize=0.1) assert_equal(lfw_people.data.shape, (1140, 15, 15)) # it is also possible to load the color version of the data, in that # case the color channels are stored in the last dimension of the data lfw_people = load_lfw_people(min_faces_per_person=100, color=True) assert_equal(lfw_people.data.shape, (1140, 62, 47, 3))
def test_load_fake_lfw_people_too_restrictive(): load_lfw_people(data_home=SCIKIT_LEARN_DATA, min_faces_per_person=100)
def test_load_empty_lfw_people(): lfw_people = load_lfw_people(data_home=SCIKIT_LEARN_EMPTY_DATA)
from scikits.learn.cross_val import StratifiedKFold from scikits.learn.datasets import load_lfw_people from scikits.learn.grid_search import GridSearchCV from scikits.learn.metrics import classification_report from scikits.learn.metrics import confusion_matrix from scikits.learn.pca import RandomizedPCA from scikits.learn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ################################################################################ # Download the data, if not already on disk and load it as numpy arrays lfw_people = load_lfw_people(min_faces_per_person=70, resize=0.4) # reshape the data using the traditional (n_samples, n_features) shape faces = lfw_people.data n_samples, h, w = faces.shape X = faces.reshape((n_samples, h * w)) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print "Total dataset size:" print "n_samples: %d" % n_samples
from scikits.learn.metrics import classification_report from scikits.learn.metrics import confusion_matrix from scikits.learn.pca import RandomizedPCA from scikits.learn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ################################################################################ # Download the data, if not already on disk and load it as numpy arrays download_if_missing = '--download' in sys.argv try: lfw_people = load_lfw_people(min_faces_per_person=70, resize=0.4, download_if_missing=download_if_missing) except IOError: print "This example needs more than 200MB of data not locally available:" print "re-run this script with '--download' to download it explicitly" print sys.exit(0) # reshape the data using the traditional (n_samples, n_features) shape faces = lfw_people.data n_samples, h, w = faces.shape X = faces.reshape((n_samples, h * w)) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names
from scikits.learn.datasets import load_lfw_people from scikits.learn.grid_search import GridSearchCV from scikits.learn.metrics import classification_report from scikits.learn.metrics import confusion_matrix from scikits.learn.pca import RandomizedPCA from scikits.learn.svm import SVC # Display progress logs on stdout logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') ################################################################################ # Download the data, if not already on disk and load it as numpy arrays lfw_people = load_lfw_people(min_faces_per_person=70, resize=0.4) # reshape the data using the traditional (n_samples, n_features) shape faces = lfw_people.data n_samples, h, w = faces.shape X = faces.reshape((n_samples, h * w)) n_features = X.shape[1] # the label to predict is the id of the person y = lfw_people.target target_names = lfw_people.target_names n_classes = target_names.shape[0] print "Total dataset size:" print "n_samples: %d" % n_samples