Exemplo n.º 1
0
 def extractFeatures(self, train_data, test_data):
     # Construct Feature Extractor
     fe = FeatureExtractor()
     fe.buildVectorizer(train_data, self.config['featureKwargs'])
     # Make feature path if it doesnt exist
     if not os.path.exists(self.feature_path):
         os.mkdir(self.feature_path)
     # Check if train vectors already exist
     if os.path.exists(os.path.join(self.feature_path, 'train_vectors.npz')):
         # If it does, load them
         train_vectors = load_npz(os.path.join(self.feature_path, 'train_vectors.npz'))
     else:
         # Make the train vectors
         train_vectors = [fe.process(feature, train_data) for feature in self.config['features']]
         if len(train_vectors) > 1:
             train_vectors = numpy.concatenate(train_vectors, axis=1)
         else:
             train_vectors = train_vectors[0]
         # Save the train vectors
         save_npz(os.path.join(self.feature_path, 'train_vectors.npz'), train_vectors)
     # Check if test vectors already exist
     if os.path.exists(os.path.join(self.feature_path, 'test_vectors.npz')):
         # If it does, load them
         test_vectors = load_npz(os.path.join(self.feature_path, 'test_vectors.npz'))
     else:
         # Make the test vectors
         test_vectors = [fe.process(feature, test_data) for feature in self.config['features']]
         if len(test_vectors) > 1:
             test_vectors = numpy.concatenate(test_vectors, axis=1)
         else:
             test_vectors = test_vectors[0]
         # Save the test vectors
         save_npz(os.path.join(self.feature_path, 'test_vectors.npz'), test_vectors)
     return train_vectors, test_vectors
Exemplo n.º 2
0
 def __init__(self, vehicle):
     print("Trip Starts")
     self.vehicle = vehicle
     self.timer = 0
     self.data = []
     self.featureExtractor = FeatureExtractor()
     self.tripNo = 1
Exemplo n.º 3
0
    def __init__(self, name, epsilon=0.05, gamma=0.8, alpha=0.2, numTraining=0):
        self.name = name
        self.cards = [] #(cardValue, cardElement)
        self.accumulatedCards = {"Fire": 0, "Water": 0, "Ice": 0}
        self.playedCard = None

        self.args = {}
        self.args['epsilon'] = epsilon
        self.args['gamma'] = gamma
        self.args['alpha'] = alpha
        self.args['numTraining'] = numTraining
        self.weights = Counter()

        # self.weights["enemy-distance-to-closest-win"] = 1.3999995454998298e-06 
        # self.weights["agent-distance-to-closest-win"] = 1.299999463999758e-06

        self.weights["enemy-distance-to-closest-win"] = -4.120535635213156 
        self.weights["agent-distance-to-closest-win"] = 9.586679017815417 
        self.weights["agent-went-closer-to-win"] = -0.9656494587969497 
        self.weights["agent-can-block-enemy-advancement"] = 15.147299275663869 


        self.featExtractor = FeatureExtractor()
        self.lastState = None
        self.lastAction = None
        self.lastScore = 0
Exemplo n.º 4
0
def create_generators():
    # creating training generator
    preloader = MatrixPreLoader(dataset_directory=training_filepath,
                                patients_to_use="ALL",
                                activity_types=activities_to_load,
                                print_loading_progress=False)
    matrix_data_generator = MatrixDataGenerator(preloader,
                                                matrix_dimensions=(224, 224),
                                                rgb=True,
                                                twoD=False,
                                                add_gaussian_noise=0,
                                                zero_sensors=0,
                                                batch_size=32,
                                                grab_data_from=(0, 1),
                                                overflow="BEFORE",
                                                print_loading_progress=False)
    training_generator = FeatureExtractor(matrix_data_generator,
                                          patient_fall_filepath,
                                          weigths_filepath,
                                          test=True)

    # create testing generator
    preloader = MatrixPreLoader(dataset_directory=testing_filepath,
                                patients_to_use="ALL",
                                activity_types=activities_to_load,
                                print_loading_progress=False)
    matrix_data_generator = MatrixDataGenerator(preloader,
                                                matrix_dimensions=(224, 224),
                                                rgb=True,
                                                twoD=False,
                                                add_gaussian_noise=0,
                                                zero_sensors=3,
                                                batch_size=50,
                                                grab_data_from=(0, 1),
                                                overflow="BEFORE",
                                                print_loading_progress=False)
    testing_generator = FeatureExtractor(matrix_data_generator,
                                         patient_fall_filepath,
                                         weigths_filepath,
                                         test=False)

    return training_generator, testing_generator
Exemplo n.º 5
0
    def testExtration(self):
        featureExt = FeatureExtractor()
        agent = Player("aql agent")
        enemy = Player("greedy agent")
        gameState = GameState(agent, enemy)
        enemy.accumulatedCards["Water"] += 1
        enemy.accumulatedCards["Fire"] += 1
        features = featureExt.getFeatures(gameState, "action", agent.name)
        self.assertEqual(features["enemy-distance-to-closest-win"], 1)
        self.assertEqual(features["agent-distance-to-closest-win"], 4)

        agent.cards.append((1, "Water"))
        enemy.accumulatedCards["Fire"] -= 1
        enemy.accumulatedCards["Water"] += 1

        features = featureExt.getFeatures(gameState, "action", agent.name)
        self.assertEqual(features["agent-distance-to-closest-win"], 3)
        self.assertEqual(features["enemy-distance-to-closest-win"], 1)
Exemplo n.º 6
0
 def extractFeatures(self, train_data, test_data):
     #Extract Features and pass them as concatenated arrays
     fe = FeatureExtractor(self.config['features'],
                           self.config['featurePath'],
                           self.config['featureKwargs'])
     fe.buildVectorizer(train_data)
     #Check for ALready done work
     if path.exists(self.config['featurePath'] + "train_data.pickle"):
         print("here's the error?")
         with open(self.config['featurePath'] + "train_data.pickle",
                   "rb") as file:
             train_vectors = pickle.load(file)
     else:
         train_vectors = fe.process(train_data)
         with open(self.config['featurePath'] + "train_data.pickle",
                   "wb+") as file:
             pickle.dump(train_vectors, file)
     if len(train_vectors) > 1:
         print("took option A")
         train_vectors = numpy.concatenate(train_vectors, axis=1)
     else:
         print("took option B")
         train_vectors = train_vectors[0]
     print(train_vectors.shape)
     print(train_vectors[1, :])
     #Check for ALready done work
     if path.exists(self.config['featurePath'] + "test_data.pickle"):
         with open(self.config['featurePath'] + "test_data.pickle",
                   "rb") as file:
             test_vectors = pickle.load(file)
     else:
         test_vectors = fe.process(test_data)
         with open(self.config['featurePath'] + "test_data.pickle",
                   "wb+") as file:
             pickle.dump(test_vectors, file)
     if len(test_vectors) > 1:
         test_vectors = numpy.concatenate(test_vectors, axis=1)
     else:
         test_vectors = test_vectors[0]
     return train_vectors.toarray(), test_vectors.toarray()
Exemplo n.º 7
0
    else:
        featureSettings['polarity'] = set(
            i for i in args.polarityFeatures.split(','))

#elif args.featuresFile:
#    features = set(args.featuresFile.read().splitlines())
#else:
#    features = {}
#print(features)

if args.markersFile:
    featureSettings['markersFile'] = args.markersFile
    #hiddenFileName += '_markers'

print(featureSettings)
fe = FeatureExtractor(**featureSettings)

os.mkdir(args.session)
os.chdir(args.session)

if not args.dataDir:
    dataDir = '/local/nlp/chidey/social_meaning/aclImdb/'
else:
    dataDir = args.dataDir
main(dataDir, fe, '{}_{}'.format(args.session, 0), args.numtrain)

learnCommandTemplate = '/local/nlp/chidey/social_meaning/yessenalina/sle_movieReviews/bin/svm_sle_learn -v 3 -c {0} -l {1} {2}_{3}_{4} hidden_vars_{3}_{4}_{2} model_{3}_{4}'.format(
    args.c, args.l, '{0}', args.session, '{1}')
#2=train/validate/testfile
#3=session
#4=iteration
Exemplo n.º 8
0
# The pre-computed features can also be downloiad from http://iamai.nl/downloads/features.npy
if not isfile(featurePath):
    print("indexing images...")
    Steles = [
        join(stelePath, f) for f in listdir(stelePath)
        if isdir(join(stelePath, f))
    ]
    for stele in Steles:
        imagePaths = [
            join(stele, f) for f in listdir(stele) if isfile(join(stele, f))
        ]
        for path in imagePaths:
            image_paths.append(path)
            labels.append(path[(path.rfind("_") + 1):path.rfind(".")])

    featureExtractor = FeatureExtractor()
    features = []
    print("computing features...")
    for idx, (batch_images,
              _) in enumerate(batchGenerator(image_paths, labels, batch_size)):
        print("{}/{}".format((idx + 1) * batch_size, len(labels)))
        features_ = featureExtractor.get_features(batch_images)
        features.append(features_)
    features = np.vstack(features)

    labels = np.asarray(labels)
    print("saving features...")
    np.save(featurePath, features)
    np.save(labelsPath, labels)
else:
    print("loading precomputed features and labels from {} and {}".format(
Exemplo n.º 9
0
from NeuralNetwork import NeuralNetwork
from featureExtractor import FeatureExtractor
import numpy as np
from DataLoader import DataLoader
import configure

#best results:
#(0.05, 60, 30, 30)
#0.5428571428571428
#(0.1, 50, 60, 30)
#0.5714285714285714
#(0.1, 300, 100, 30)
#0.6
fe = FeatureExtractor("generatedData/eigenfaces.csv",
                      "generatedData/average_face.csv")
best = 0.0
for lr in range(4, 7, 1):
    for ne in range(380, 421, 5):
        for nhn in range(40, 91, 10):
            for nev in range(50, 91, 10):
                configure.setUpConfig(lr / 100, ne, nhn, nev)

                #prepare data for training:
                dl = DataLoader(configure.config_global.modeTrain)
                dl.load_all_images()
                datasetTrain = fe.generate_dataset(dl.images)

                #train NN:
                nn = NeuralNetwork(configure.config_global.noOfEigenValues,
                                   configure.config_global.noOfHidNeur)
                nn.trainNetwork(datasetTrain)
Exemplo n.º 10
0
def main():
    """" Preprocesses, extracts, learns, tests"""

    # process flags
    do_retrain, do_rebuildValidation, do_test = False, False, False

    for arg in sys.argv[1:]:
        if ("--retrain" in arg):
            if ("yes" in arg):
                do_retrain = True
        if ("--rebuildValidation" in arg):
            if ("yes" in arg):
                do_rebuildValidation = True
        if ("--test" in arg):
            if ("yes" in arg):
                do_test = True

    # preprocessing
    do = DataOrganizer()

    # __________________________________ TRAINING ________________________ #

    # use BoG to convert to frequency vector

    fe = FeatureExtractor(FeatureExtractor.ModelType.BagOfClusters)

    clf = 0
    clf_file = ""

    # get the latest trained model
    filenames = os.listdir("models/")
    if len(filenames) > 0:
        clf_file = "models/" + filenames[-1]
    else:
        clf_file = None

    # get sets of tweets as training data
    # trainData0, trainData1, validation0, validation1 \
    #     = do.organizeTrainWithValidation("data/trainValidate/", do_rebuildValidation)

    trainData0, trainData1 = do.organizeTrain("data/train/")

    if do_retrain or not clf_file:
        # split training set into validation and training set
        X0, X1 = fe.extractTrainFeatureVectors((trainData0, trainData1))
        clf = learn(X0, X1)

        millis = int(round(time.time() * 1000))
        clf_file = "trainedModel" + str(millis)
        print "Saving model to file..."

        joblib.dump(clf, "models/" + clf_file, compress=1)
    else:
        print "Using trained model and BoG..."
        fe.bog = BagOfWords()
        fe.bog.getLatestBoG()
        clf = joblib.load(clf_file)

    # we're either validating or testing based on the passed flag

    # ____________________________________VALIDATION__________________________#
    if not do_test:
        # feed in the validation sets as one set
        validationData = do.organizeTest("data/validation/")
        validationFeatures, validationLabels = fe.extractTestFeatureVectors(
            validationData)
        test("Validation", clf, validationFeatures, validationLabels)
    else:
        # ____________________________________TESTING _______________________ #

        # extract test features and test
        print "Using testing"
        testData, testLabels = do.organizeTest("data/test/")
        testFeatures = fe.extractTestFeatureVectors(testData)
        test("Testing, Global Protests With Background Subtraction", clf,
             testFeatures, testLabels)
Exemplo n.º 11
0
def registrate(drone_img_ori, pcl_img_ori, mask_image, args):
    common_args = {
        'pcl_mask': args.pcl_mask,
        'drone_mask': args.drone_mask,
        'save_masked_pcl': args.save_masked_pcl,
        'save_masked_drone': args.save_masked_drone,
        'save_keypoints': args.save_keypoints,
        'save_csv': args.save_csv,
        'save_matching': args.save_matching
    }

    result = {}

    # Preprocess Images
    img_preprocessor = Preprocessor(drone_img_ori, pcl_img_ori, mask_image)
    img_preprocessor.preprocessing()
    imgs = img_preprocessor.get_processed_imgs()

    processed_drone_img = imgs['processed_drone_img']
    processed_pcl_img = imgs['processed_pcl_img']
    processed_drone_mask = imgs['processed_drone_mask']
    processed_pcl_mask = imgs['processed_pcl_mask']
    masked_drone_img = imgs['masked_drone_img']
    masked_pcl_img = imgs['masked_pcl_img']

    if common_args['save_masked_pcl'] is True:
        result.update({'masked_pcl': masked_pcl_img})

    if common_args['save_masked_drone'] is True:
        result.update({'masked_drone': masked_drone_img})

    # Extract Features
    drone_feature_extractor = FeatureExtractor(processed_drone_img, "SIFT",
                                               args)
    pcl_feature_extractor = FeatureExtractor(processed_pcl_img, "SIFT", args)

    if common_args['pcl_mask'] is True:
        print("pcl_mask: True")
        pcl_feature_extractor.compute(mask=processed_pcl_mask)
    else:
        print("No pcl_mask")
        pcl_feature_extractor.compute(mask=None)

    if common_args['drone_mask'] is True:
        print("drone_mask: True")
        drone_feature_extractor.compute(mask=processed_drone_mask)
    else:
        print('No drone_mask')
        drone_feature_extractor.compute(mask=None)

    drone_features, drone_descs = drone_feature_extractor.get_features_and_descriptors(
    )

    pcl_features, pcl_descs = pcl_feature_extractor.get_features_and_descriptors(
    )

    if common_args['save_keypoints'] is True:
        keypoints_lidar = cv2.drawKeypoints(
            pcl_img_ori,
            pcl_features,
            outImage=np.array([]),
            color=(0, 0, 255),
            flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        keypoints_drone = cv2.drawKeypoints(
            drone_img_ori,
            drone_features,
            outImage=np.array([]),
            color=(0, 0, 255),
            flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
        result.update({'keypoints_lidar_image': keypoints_lidar})
        result.update({'keypoints_drone_image': keypoints_drone})

    # Find Matching
    matcher = Matcher(drone_features, drone_descs, pcl_features, pcl_descs,
                      args)
    matcher.extract_match()

    raw_matchs = matcher.get_matchs()
    good_matchs = matcher.get_good_matchs()

    # Find Homography
    homography, status = find_homography(drone_features, pcl_features,
                                         good_matchs, args)

    if common_args['save_csv'] is True:
        result.update({'drone_total_keypoints': len(drone_features)})
        result.update({'pcl_total_keypoints': len(pcl_features)})
        result.update({'num_inliers': (status.ravel().astype(int) == 1).sum()})
        result.update({'num_raw_matches': len(raw_matchs)})
        result.update({'num_good_matches': len(good_matchs)})
        result.update({'homography': homography})

    if common_args['save_matching'] is True:
        matching1 = matcher.draw_matches(processed_drone_img,
                                         processed_pcl_img, status, homography)
        matching2 = matcher.draw_matches(processed_drone_img,
                                         processed_pcl_img, None, homography)
        matching3 = matcher.draw_matches(processed_drone_img,
                                         processed_pcl_img)
        result.update({'matching1': matching1})
        result.update({'matching2': matching2})
        result.update({'matching3': matching3})

    registated_image = cv2.warpPerspective(
        drone_img_ori, homography,
        (processed_pcl_img.shape[1], processed_pcl_img.shape[0]))

    ret_image = cv2.add(registated_image,
                        cv2.cvtColor(processed_pcl_img, cv2.COLOR_GRAY2BGR))

    result.update({'image': ret_image})
    return result
Exemplo n.º 12
0
        avg = float(sum(relavant_vals)) / len(points)
        if avg > 0: res[dimension] = avg
    return res


examples = []
laterexamples = []
vectorX = []
with open("Period 5 Rand.csv", 'rb') as file_reader:
    reader = csv.reader(file_reader, delimiter=",")
    counter = 0
    for line in reader:
        if counter == 5000: break  #to maintain manageable time
        counter += 1
        vectorX.append(line[1])
        laterObj = (line[0], FeatureExtractor(line[1]).featureVector())
        examples.append(laterObj[1])
        laterexamples.append(laterObj)

extractor = sklearn.feature_extraction.text.CountVectorizer(
    input='content', ngram_range=(2, 3), max_df=.7, stop_words='english')
X = extractor.fit_transform(vectorX)

distArr = []
inerArr = []
for k in range(1, 20):
    clusterer = sklearn.cluster.KMeans(n_clusters=k)
    res = clusterer.fit(X)
    print "Inertia with %d clusters is %d" % (k, clusterer.inertia_)
    inerArr.append(clusterer.inertia_)
    #build the dict-style clusters so I can use my same distortion function consistently across them
Exemplo n.º 13
0
    def __init__(self,
                 corpdb=fwc.DEF_CORPDB,
                 corptable=fwc.DEF_CORPTABLE,
                 correl_field=fwc.DEF_CORREL_FIELD,
                 mysql_host="localhost",
                 message_field=fwc.DEF_MESSAGE_FIELD,
                 messageid_field=fwc.DEF_MESSAGEID_FIELD,
                 encoding=fwc.DEF_ENCODING,
                 use_unicode=fwc.DEF_UNICODE_SWITCH,
                 lexicondb=fwc.DEF_LEXICON_DB,
                 featureTable=fwc.DEF_FEAT_TABLE,
                 featNames=fwc.DEF_FEAT_NAMES,
                 date_field=fwc.DEF_DATE_FIELD,
                 outcome_table=fwc.DEF_OUTCOME_TABLE,
                 outcome_value_fields=[fwc.DEF_OUTCOME_FIELD],
                 outcome_controls=fwc.DEF_OUTCOME_CONTROLS,
                 outcome_interaction=fwc.DEF_OUTCOME_CONTROLS,
                 group_freq_thresh=None,
                 featureMappingTable='',
                 featureMappingLex='',
                 output_name='',
                 wordTable=None,
                 model=fwc.DEF_MODEL,
                 feature_selection='',
                 feature_selection_string='',
                 init=None):

        if feature_selection_string or feature_selection:
            RegressionPredictor.featureSelectionString = feature_selection if feature_selection else feature_selection_string

        if init:
            self.fw = FeatureWorker(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb, date_field,
                wordTable) if 'fw' in init else None
            self.fg = FeatureGetter(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb,
                featureTable, featNames, wordTable) if 'fg' in init else None
            self.fe = FeatureExtractor(
                corpdb,
                corptable,
                correl_field,
                mysql_host,
                message_field,
                messageid_field,
                encoding,
                use_unicode,
                lexicondb,
                wordTable=wordTable) if 'fe' in init else None
            self.fr = FeatureRefiner(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb,
                featureTable, featNames, wordTable) if 'fr' in init else None
            self.og = OutcomeGetter(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb,
                outcome_table, outcome_value_fields, outcome_controls,
                outcome_interaction, group_freq_thresh, featureMappingTable,
                featureMappingLex, wordTable) if 'og' in init else None
            self.oa = OutcomeAnalyzer(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb,
                outcome_table, outcome_value_fields, outcome_controls,
                outcome_interaction, group_freq_thresh, featureMappingTable,
                featureMappingLex, output_name,
                wordTable) if 'oa' in init else None
            self.rp = RegressionPredictor(self.og, self.fg,
                                          model) if 'rp' in init else None
            self.cp = ClassifyPredictor(self.og, self.fg,
                                        model) if 'cp' in init else None

        else:
            self.fw = FeatureWorker(corpdb, corptable, correl_field,
                                    mysql_host, message_field, messageid_field,
                                    encoding, use_unicode, lexicondb,
                                    date_field, wordTable)
            self.fg = FeatureGetter(corpdb, corptable, correl_field,
                                    mysql_host, message_field, messageid_field,
                                    encoding, use_unicode, lexicondb,
                                    featureTable, featNames, wordTable)
            self.fe = FeatureExtractor(corpdb,
                                       corptable,
                                       correl_field,
                                       mysql_host,
                                       message_field,
                                       messageid_field,
                                       encoding,
                                       use_unicode,
                                       lexicondb,
                                       wordTable=wordTable)
            self.fr = FeatureRefiner(corpdb, corptable, correl_field,
                                     mysql_host, message_field,
                                     messageid_field, encoding, use_unicode,
                                     lexicondb, featureTable, featNames,
                                     wordTable)
            self.og = OutcomeGetter(corpdb, corptable, correl_field,
                                    mysql_host, message_field, messageid_field,
                                    encoding, use_unicode, lexicondb,
                                    outcome_table, outcome_value_fields,
                                    outcome_controls, outcome_interaction,
                                    group_freq_thresh, featureMappingTable,
                                    featureMappingLex, wordTable)
            self.oa = OutcomeAnalyzer(
                corpdb, corptable, correl_field, mysql_host, message_field,
                messageid_field, encoding, use_unicode, lexicondb,
                outcome_table, outcome_value_fields, outcome_controls,
                outcome_interaction, group_freq_thresh, featureMappingTable,
                featureMappingLex, output_name, wordTable)
            self.rp = RegressionPredictor(self.og, self.fg, model)
            self.cp = ClassifyPredictor(self.og, self.fg, model)

        self.allFW = {
            "FeatureWorker": self.fw,
            "FeatureGetter": self.fg,
            "FeatureExtractor": self.fe,
            "FeatureRefiner": self.fr,
            "OutcomeGetter": self.og,
            "OutcomeAnalyzer": self.oa,
            "RegressionPredictor": self.rp,
            "ClassifyPredictor": self.cp,
        }
Exemplo n.º 14
0
    def createDataset(self, pathRaw, pathLabels, pathVectors,
                      pathCorrespondence):

        print("Loading spacy")
        nlp = spacy.load('en_core_web_md')
        print("loaded")
        labelIdx = 0

        #minWords = 9999
        #maxWords = 0
        #avgWords = 0

        listFiles = os.listdir(pathRaw)
        nTexts = len(listFiles)
        fdOutLabels = open(pathLabels, "w")
        fdOutCorrespondence = open(pathCorrespondence, "w")

        for i, fname in enumerate(listFiles):
            label = fname.split("_")[1]
            if label not in self.labelDict:
                self.labelDict[label] = labelIdx
                labelIdx += 1

            numeric_label = self.labelDict[label]
            fdOutLabels.write(str(numeric_label) + "\n")

            fd = open(pathRaw + fname, "r")
            raw = fd.read()
            iF = FeatureExtractor(raw, nlp)
            #FIRST SENTENCE ONLY NOW
            instanceVectors = []
            for wordDict in iF.features[0]:
                instanceVectors.append(wordDict["vector"])
                #only include maxWords vectors
                if len(instanceVectors) == self.maxLen:
                    break

            if len(instanceVectors) < self.maxLen:
                while len(instanceVectors) < self.maxLen:
                    instanceVectors.append(list(np.zeros(268)))
            '''
			nWords = len(instanceVectors)
			if nWords > maxWords:
				maxWords = nWords
			if nWords < minWords:
				minWords = nWords

			avgWords += nWords
			
			'''

            self.dataset.append(instanceVectors)
            fd.close()
            print(i, "of", nTexts)

        fdOutLabels.close()
        self.dataset = np.array(self.dataset)
        #save feature vectors per text
        np.save(pathVectors, self.dataset)

        fdOutCorrespondence.write(str(self.labelDict))
        fdOutCorrespondence.close()
        '''
Exemplo n.º 15
0
    def model_training(self):

        # check if the feature file is present, if so; there is no need to recompute the features
        # The pre-computed features can also be downloaded from http://iamai.nl/downloads/features.npy
        if not isfile(self.featurePath):
            print("indexing images...")
            Steles = [
                join(self.stelePath, f) for f in listdir(self.stelePath)
                if isdir(join(self.stelePath, f))
            ]
            for stele in Steles:
                imagePaths = [
                    join(stele, f) for f in listdir(stele)
                    if isfile(join(stele, f))
                ]
                for path in imagePaths:
                    self.image_paths.append(path)
                    self.labels.append(path[(path.rfind("_") +
                                             1):path.rfind(".")])

            featureExtractor = FeatureExtractor()
            features = []
            print("computing features...")
            for idx, (batch_images, _) in enumerate(
                    batchGenerator(self.image_paths, self.labels,
                                   self.batch_size)):
                print("{}/{}".format((idx + 1) * self.batch_size,
                                     len(self.labels)))
                features_ = featureExtractor.get_features(batch_images)
                features.append(features_)
            features = np.vstack(features)

            labels = np.asarray(self.labels)
            print("saving features...")
            np.save(self.featurePath, features)
            np.save(self.labelsPath, labels)
        else:
            print("loading precomputed features and labels from {} and {}".
                  format(self.featurePath, self.labelsPath))
            features = np.load(self.featurePath)
            labels = np.load(self.labelsPath)

        # on to the SVM trainign phase
        tobeDeleted = np.nonzero(
            labels == "UNKNOWN")  # Remove the Unknown class from the database
        features = np.delete(features, tobeDeleted, 0)
        labels = np.delete(labels, tobeDeleted, 0)
        numImages = len(labels)
        trainSet, testSet, trainLabels, testLabels = train_test_split(
            features, labels, test_size=0.20, random_state=42)

        # Training SVM, feel free to use linear SVM (or another classifier for that matter) for faster training, however that will not give the confidence scores that can be used to rank hieroglyphs
        print("training SVM...")
        if 0:  # optinal; either train 1 classifier fast, or search trough the parameter space by training multiple classifiers to sqeeze out that extra 2%
            clf = linear_model.LogisticRegression(C=10000)
        else:
            svr = linear_model.LogisticRegression()
            parameters = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]}
            clf = GridSearchCV(svr, parameters, n_jobs=8)
        clf.fit(trainSet, trainLabels)

        print(clf)
        print("finished training! saving...")
        joblib.dump(clf, self.svmPath, compress=1)

        prediction = clf.predict(testSet)
        accuracy = np.sum(testLabels == prediction) / float(len(prediction))

        # for idx, pred in enumerate(prediction):
        #     print("%-5s --> %s" % (testLabels[idx], pred))
        print("accuracy = {}%".format(accuracy * 100))
Exemplo n.º 16
0
	def __init__(self, n):
		self.data = DataManager('../data/train.csv','../data/test.csv', n)
		self.fe = FeatureExtractor(self.data)
		self.eval = Evaluate()