Exemplo n.º 1
0
def predict():

    # initialize the data dictionary that will be returned from the
    # view
    data = {"success": False}

    # ensure an image was properly uploaded to our endpoint
    if flask.request.method == "POST":
        if flask.request.files.get("file"):
            # read the image in PIL format
            image = flask.request.files["file"].read()
            print(f"The image is: {flask.request.files['file']}")
            image = Image.open(io.BytesIO(image))
            # preprocess the image and prepare it for classification
            image_ = load(image)

            # classify the input image and then initialize the list
            # of predictions to return to the client
            # model = keras.models.load_model('model.model')
            preds = xray_model.predict(image_)
            data["success"] = True

            preds_list = preds.tolist()[0]
            preds_list[0] = round(preds_list[0] * 100, 2)
            preds_list[1] = round(preds_list[1] * 100, 2)

            data["predictions"] = preds_list

    # return the data dictionary as a JSON response
    if flask.request.content_type.startswith("application/json"):
        return flask.jsonify(data)
    return render_template("result.html", data=data)
Exemplo n.º 2
0
def main():
    metadata = Metadata()
    data, labels = preprocessing.load(metadata)
    data = preprocessing.encode(data, metadata.COLUMNS)

    # divide data into training and test sets
    x_train, x_test, y_train, y_test = train_test_split(
        data, labels, test_size=0.2)  #, random_state=33)

    # run classifiers classifiers
    clf_base = model.baseline_classifier(x_train, y_train)
    clf_nb = model.naive_bayes_classifier(x_train, y_train, metadata.COLUMNS)
    clf_knn = model.knn_classifier(x_train, y_train, metadata.COLUMNS)
    clf_svm = model.svm_classifier(x_train, y_train, metadata.COLUMNS)

    # filter best classifier
    clf = [(clf[1].best_score_, clf)
           for clf in [('base',
                        clf_base), ('knn', clf_knn), ('svm',
                                                      clf_svm), ('nb', clf_nb)]
           ]
    name, clf = max(clf, key=lambda x: x[0])[1]

    # predict test set
    y_pred = clf.predict(x_test)
    print 'Best classifier: %s' % name
    print '\taccuracy: %0.3f\n' % accuracy_score(y_test, y_pred)
    print classification_report(y_test, y_pred)
Exemplo n.º 3
0
def cv(k, c, extra=[]):

    p = 0
    r = 0
    f1 = 0

    p_all = np.zeros(3)
    r_all = np.zeros(3)
    f1_all = np.zeros(3)

    a = 0
    data, data_raw = pp.load(True)
    kf = KFold(data.shape[0], n_folds=k)
    for train_index, test_index in kf:
        training_data = data.iloc[train_index, :]
        raw_training = data_raw.iloc[train_index, :]
        test_data = data.iloc[test_index, :]
        raw_test = data_raw.iloc[test_index, :]
        #print 'train_size: '+str(training_data.shape[0])
        #print 'test_size: '+str(test_data.shape[0])

        predict, p_, r_, f1_, p_all_, r_all_, f1_all_ = run(
            training_data, test_data, raw_training, raw_test, c, extra)

        p += p_
        r += r_
        f1 += f1_

        p_all += p_all_
        r_all += r_all_
        f1_all += f1_all_

    p /= 1.0 * len(kf)
    r /= 1.0 * len(kf)
    f1 /= 1.0 * len(kf)
    p_all /= len(kf)
    r_all /= len(kf)
    f1_all /= len(kf)
    score_set.append(f1)
    return p, r, f1, p_all, r_all, f1_all
Exemplo n.º 4
0
from preprocessing import load, add_title_bad
from utilities import split, tfidf_transform

if __name__ == '__main__':
    steam_reviews, nlp, docs = load(r"steam_reviews.csv")
    add_title_bad(docs, steam_reviews)

    # Split on user_suggestion
    X_train, X_test, y_train, y_test = split(docs,
                                             steam_reviews.user_suggestion)

    # TF-IDF and LDA
    X_train_tfidf, X_test_tfidf, tfidf = tfidf_transform(X_train, X_test)

Exemplo n.º 5
0
import sys
from training import *
from preprocessing import load

if __name__ == "__main__":
    if (sys.argv[1] == '2'):
        validation_reviews = load('validationreviews.pkl')
        validation_list = load('validationlist.pkl')
        test_reviews = load('testreviews.pkl')
        test_list = load('testlist.pkl')
        random_list_pos = load('randompos.pkl')
        random_list_neg = load('randomneg.pkl')
        review_list = load('reviewlist.pkl')
        indexlist = load('indexlist.pkl')

        Tree = tree()
        Tree.treeconstruct(random_list_pos, random_list_neg, indexlist,
                           review_list)
        print("total nodes in decision tree initially : " +
              str(count_nodes(Tree.root)))
        print("total leaves in decision tree initially : " +
              str(count_leaves(Tree.root)))
        h = height(Tree.root)
        print("height of decision tree initially : " + str(h))
        print("accuracy on training set : " + str(
            accuracy(Tree, (random_list_pos + random_list_neg), review_list)))
        print("accuracy on test set : " +
              str(accuracy(Tree, test_list, test_reviews)))
        indexcount = {}
        count_splitting_times(Tree.root, indexcount)
        count = 0
Exemplo n.º 6
0
    def main(self):
        t_start = datetime.now()
        logger.info(' {} / {} '.format(self.name,
                                       self.random_seed).center(62, '='))
        logger.info('Hyperparameters:\n{}'.format(pprint.pformat(self.params)))
        if os.path.isfile(os.path.join(self.output_dir, 'test.csv')):
            logger.info('Output already exists - skipping')

        # Initialize the random number generator
        self.random_state = RandomState(self.random_seed)
        np.random.seed(
            int.from_bytes(self.random_state.bytes(4),
                           byteorder=sys.byteorder))

        preprocessed_data = preprocessing.load(self.params)
        vectorizer = self.build_vectorizer(preprocessed_data)

        train_df = common.load_data('train')
        train_df['comment_text'] = train_df['id'].map(preprocessed_data)
        test_df = common.load_data('test')
        test_df['comment_text'] = test_df['id'].map(preprocessed_data)

        folds = common.stratified_kfold(train_df, random_seed=self.random_seed)
        for fold_num, train_ids, val_ids in folds:
            logger.info(f'Fold #{fold_num}')

            fold_train_df = train_df[train_df['id'].isin(train_ids)]
            fold_val_df = train_df[train_df['id'].isin(val_ids)]
            models = self.train(fold_num, vectorizer, fold_train_df,
                                fold_val_df)

            logger.info('Generating the out-of-fold predictions')
            path = os.path.join(self.output_dir,
                                f'fold{fold_num}_validation.csv')
            self.predict(models, vectorizer, fold_val_df, path)

            logger.info('Generating the test predictions')
            path = os.path.join(self.output_dir, f'fold{fold_num}_test.csv')
            self.predict(models, vectorizer, test_df, path)

        logger.info('Combining the out-of-fold predictions')
        df_parts = []
        for fold_num in range(1, 11):
            path = os.path.join(self.output_dir,
                                f'fold{fold_num}_validation.csv')
            df_part = pd.read_csv(path, usecols=['id'] + common.LABELS)
            df_parts.append(df_part)
        train_pred = pd.concat(df_parts)
        path = os.path.join(self.output_dir, 'train.csv')
        train_pred.to_csv(path, index=False)

        logger.info('Averaging the test predictions')
        df_parts = []
        for fold_num in range(1, 11):
            path = os.path.join(self.output_dir, f'fold{fold_num}_test.csv')
            df_part = pd.read_csv(path, usecols=['id'] + common.LABELS)
            df_parts.append(df_part)
        test_pred = pd.concat(df_parts).groupby('id', as_index=False).mean()
        path = os.path.join(self.output_dir, 'test.csv')
        test_pred.to_csv(path, index=False)

        logger.info('Total elapsed time - {}'.format(datetime.now() - t_start))
import numpy as np
import preprocessing as pre
# --- SVM balanceada tomando a media dos intervalos de 10 em 10 ---

data_num = 500
num_int = 2560
#Lendo todos os dados do experimento
X, y = pre.load('dataset/', data_num, num_int)
#Pegando a media em um numero de 10 intervalos para cada componente
X = pre.med_intervalo(X, 10)
#Balanceando os dados
X, y = pre.proc_balanceado(X, y, data_num)
#Separando em conjunto de treino e teste (pego de forma aleatoria, aleatorizando também as variáveis dependentes)
X_train, X_test, y_train, y_test = pre.split_data(X, y, 0.2, None)
#Padronizando dados
X_train, X_test = pre.standardize_data(X_train, X_test)

#Implementando a SVM
from sklearn.svm import SVC
classifier = SVC(kernel='rbf', probability=True, gamma='auto')
#Treinando a SVM
classifier.fit(X_train, y_train.ravel())

#Prevendo os resultados de teste
y_pred = classifier.predict(X_test)
svm_predict = classifier.predict_proba(X_test)

#Produzindo a confusion matrix da SVM acima
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
print('\n\nConfusion Matrix: \n', cm)
Exemplo n.º 8
0
import pandas
import preprocessing
from defines import Types, Metadata


def print_pivot_tables(data, metadata, numerical=False):
    for column in metadata.COLUMNS:
        if not numerical and column.TYPE is Types.NUMERICAL or column.CATEGORIES is None:
            continue
        df_column = pandas.DataFrame(data[column.HEADER])
        count = df_column.apply(pandas.value_counts).T
        count = count.reindex_axis(sorted(count.columns), axis=1)
        print count, '\n'


def print_statistics(data, metadata):
    for column in metadata.COLUMNS:
        if column.TYPE is not Types.NUMERICAL or column.CATEGORIES is not None:
            continue
        print pandas.DataFrame(data[column.HEADER]).describe(), '\n'


if __name__ == '__main__':
    metadata = Metadata()
    data, _ = preprocessing.load(metadata)
    print_pivot_tables(data, metadata, numerical=True)
    print_statistics(data, metadata)
    def generate_data(self, x_data, y_data):
        # Memory allocation
        x_gen = np.zeros([
            self.batch_size, self.image_height, self.image_width,
            self.image_depth
        ])
        y_gen = np.zeros([self.batch_size])
        while True:
            # New epoch
            x_data, y_data = shuffle(x_data, y_data)
            count_gen = 0
            for i in range(len(x_data)):
                # Generate center + augmentations
                center = pre.load(x_data[i][0])
                center_steer = y_data[i][0]
                count_gen = self.generate_feature(center, center_steer, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(center, center_steer)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.shear_feature(center, center_steer,
                                                     random.uniform(-25, -15),
                                                     0.25)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(image, steering)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.shear_feature(center, center_steer,
                                                     random.uniform(15, 25),
                                                     0.25)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(image, steering)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                # Generate left + augmentations
                left = pre.load(x_data[i][1])
                left_steer = y_data[i][1]
                count_gen = self.generate_feature(left, left_steer, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(left, left_steer)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.shear_feature(left, left_steer,
                                                     random.uniform(15, 25),
                                                     0.5)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(image, steering)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                # Generate right + augmentations
                right = pre.load(x_data[i][2])
                right_steer = y_data[i][2]
                count_gen = self.generate_feature(right, right_steer, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(right, right_steer)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.shear_feature(right, right_steer,
                                                     random.uniform(-25, -15),
                                                     0.5)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

                image, steering = self.flip_feature(image, steering)
                count_gen = self.generate_feature(image, steering, x_gen,
                                                  y_gen, count_gen)
                if count_gen == 0:
                    yield x_gen, y_gen

            # Yield remainder
            if count_gen > 0:
                yield x_gen, y_gen
Exemplo n.º 10
0
#This section describes the main algorithm for training the data
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
import preprocessing
import numpy as np
import model

#preprocessing call, after this step, features and labels will be extracted out of the raw data
features, labels = preprocessing.load()
print("Feature shape:", np.asarray(features).shape)
print("Label shape:", np.asarray(labels).shape)

# Shuffle up the different moves
#features, labels = shuffle(features, labels)

#Train-Test Split with 7:3 Ratio/ Cross validation if there is not enough data
#features, labels = shuffle(features, labels)
# X_train, X_val, X_test = features[:int(len(features)*0.5), :], features[int(len(features)*0.5):int(len(features)*0.75), :], features[int(len(features)*0.75):, :]
# y_train, y_val, y_test = labels[:int(len(labels)*0.5), :], labels[int(len(labels)*0.5):int(len(labels)*0.75), :], labels[int(len(labels)*0.75):, :]
np.random.seed(1)
X_train, X_test, y_train, y_test = train_test_split(features,
                                                    labels,
                                                    test_size=0.25,
                                                    random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train,
                                                  y_train,
                                                  test_size=0.33,
                                                  random_state=1)

#One Hot Encoding labels
Exemplo n.º 11
0
Arquivo: base.py Projeto: nptit/kaggle
 def load_preprocessed_data(self):
     preprocessed_data = preprocessing.load(self.params)
     return preprocessed_data
Exemplo n.º 12
0
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import time
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
from sklearn.decomposition import PCA
import re
import json
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

base_directory = "F:\\code\\python\\lvtn\\"

score_set = []

# 0:NEG, 1:NEU, 2:POS
training_data, test_data, raw_training, raw_test, raw = pp.load()
socal = pd.read_csv(base_directory + "so-cal.csv")
#x = socal[socal.socal<0]['socal'] - 1
#socal.loc[socal.socal<0, 'socal']  = x

pca = LinearDiscriminantAnalysis()
pca.fit(socal['socal'].reshape(-1, 1), socal['lab'])
socal['y'] = pca.transform(socal['socal'].reshape(-1, 1))


def training_ngram(corpus, min_df=3):
    vectorizer = CountVectorizer(
        min_df=3,
        decode_error="ignore",
        analyzer="word",
        lowercase=True,