Пример #1
0
def prediction():
    df = pd.read_csv("train.csv")
    # df1 = pd.read_csv("test.csv")

    df['text'] = df['text'].str.replace("[^A-Za-z ]", "")
    train, test = train_test_split(df, test_size=0.3)
    train_1 = train[train['target'] == 1]
    train_1 = train_1['text']
    train_0 = train[train['target'] == 0]
    train_0 = train_0['text']

    tweets = get_cleaned_tweets(train)
    w_features = get_word_features(tweets)
    # plt = wordcloud_draw(w_features)
    test_0 = test[test['target'] == 0]
    test_0 = test_0['text']
    test_1 = test[test['target'] == 1]
    test_1 = test_1['text']
    training_set = nltk.classify.apply_features(extract_features, tweets)
    classifier = nltk.NaiveBayesClassifier.train(training_set)
    fake_cnt = 0
    real_cnt = 0
    for obj in test_0:
        res = classifier.classify(extract_features(obj.split()))
        if (res == 0):
            fake_cnt = fake_cnt + 1
    for obj in test_1:
        res = classifier.classify(extract_features(obj.split()))
        if (res == 1):
            real_cnt = real_cnt + 1
    print('[Fake Tweets]: %s/%s ' % (len(test_0), fake_cnt))
    print('[Real Tweets]: %s/%s ' % (len(test_1), real_cnt))
    return (fake_cnt + real_cnt) / (len(test_0) + len(test_1))
Пример #2
0
def prediction_sentiment(sentiment):
    df = pd.read_csv("train.csv")
    # df1 = pd.read_csv("test.csv")

    df['text'] = df['text'].str.replace("[^A-Za-z ]", "")
    train, test = train_test_split(df, test_size=0.3)
    train_1 = train[train['target'] == 1]
    train_1 = train_1['text']
    train_0 = train[train['target'] == 0]
    train_0 = train_0['text']

    tweets = get_cleaned_tweets(train)
    w_features = get_word_features(tweets)
    # plt = wordcloud_draw(w_features)
    test_0 = test[test['target'] == 0]
    test_0 = test_0['text']
    test_1 = test[test['target'] == 1]
    test_1 = test_1['text']
    training_set = nltk.classify.apply_features(extract_features, tweets)
    classifier = nltk.NaiveBayesClassifier.train(training_set)
    res = classifier.classify(extract_features(sentiment.split()))
    Sentiment_type = "Neutral"
    if (res == 0):
        Sentiment_type = "Don't Worry it's not Dangerous "
    if (res == 1):
        Sentiment_type = "Dangerous"
    return Sentiment_type
Пример #3
0
def upload():
    target = os.path.join(APP_ROOT, 'images/')
    # target = os.path.join(APP_ROOT, 'static/')
    print(target)
    if not os.path.isdir(target):
        os.mkdir(target)
    else:
        print("Couldn't create upload directory: {}".format(target))
    print(request.files.getlist("file"))
    for upload in request.files.getlist("file"):
        print(upload)
        print("{} is the file name".format(upload.filename))
        filename = upload.filename
        destination = "/".join([target, filename])
        print("Accept incoming file:", filename)
        print("Save it to:", destination)
        upload.save(destination)
    global List
    target2 = os.path.join(APP_ROOT, 'images/', filename)
    List = functions.extract_features(target2)
    Output = functions.input_visage(List)
    if (Output == 1):
        result = 'Trisomy 21 founded'
    elif (Output == 2):
        result = 'Angel man Syndrome founded'
    elif (Output == 3):
        result = 'Williams Syndrome founded'
    else:
        result = 'No Syndrome founded'
    if (Output == 1):
        description = 'Down syndrome, also known as trisomy 21, is a genetic disorder caused by the presence of all or part of a third copy of chromosome 21. It is usually associated with physical growth delays, mild to moderate intellectual disability, and characteristic facial features'
        frec = 'Down syndrome occurs in about 1 in 800 newborns. About 5,300 babies with Down syndrome are born in the United States each year, and approximately 200,000 people in this country have the condition. Although women of any age can have a child with Down syndrome, the chance of having a child with this condition increases as a woman gets older.'
    elif (Output == 2):
        description = 'Angelman syndrome is a complex genetic disorder that primarily affects the nervous system. Characteristic features of this condition include delayed development, intellectual disability, severe speech impairment, and problems with movement and balance (ataxia). Most affected children also have recurrent seizures (epilepsy) and a small head size (microcephaly). Delayed development becomes noticeable by the age of 6 to 12 months, and other common signs and symptoms usually appear in early childhood.'
        frec = 'Angelman syndrome affects an estimated 1 in 12,000 to 20,000 people.'
    elif (Output == 3):
        description = 'Williams syndrome is a developmental disorder that affects many parts of the body. This condition is characterized by mild to moderate intellectual disability or learning problems, unique personality characteristics, distinctive facial features, and heart and blood vessel (cardiovascular) problems. \n People with Williams syndrome typically have difficulty with visual-spatial tasks such as drawing and assembling puzzles, but they tend to do well on tasks that involve spoken language, music, and learning by repetition (rote memorization). Affected individuals have outgoing, engaging personalities and tend to take an extreme interest in other people. Attention deficit disorder (ADD), problems with anxiety, and phobias are common among people with this disorder.'
        frec = 'Williams syndrome affects an estimated 1 in 7,500 to 10,000 people.'
    else:
        description = 'No description '
        frec = 'Normal person'
    # return send_from_directory("images", filename, as_attachment=True)
    return render_template("final.html",
                           name=result,
                           desc=description,
                           desc2=frec,
                           image_name=filename)
Пример #4
0
pix_per_cell = 6  # HOG pixels per cell
cell_per_block = 2  # HOG cells per block
hog_channel = 'ALL'  #'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size = (64, 64)  # Spatial binning dimensions
hist_bins = 32  # Number of histogram bins
spatial_feat = True  # Spatial features on or off LAB
hist_feat = True  # Histogram features on or off HSV
hog_feat = True  # HOG features on or off YUV

print('Extracting Car Features...')
car_features = extract_features(cars,
                                color_space=color_space,
                                spatial_size=spatial_size,
                                hist_bins=hist_bins,
                                orient=orient,
                                pix_per_cell=pix_per_cell,
                                cell_per_block=cell_per_block,
                                hog_channel=hog_channel,
                                spatial_feat=spatial_feat,
                                hist_feat=hist_feat,
                                hog_feat=hog_feat)

print('\nExtracting NotCar Features...')

notcar_features = extract_features(notcars,
                                   color_space=color_space,
                                   spatial_size=spatial_size,
                                   hist_bins=hist_bins,
                                   orient=orient,
                                   pix_per_cell=pix_per_cell,
                                   cell_per_block=cell_per_block,
Пример #5
0
train_pos = all_pos[:4000]
test_pos = all_pos[4000:]
train_neg = all_neg[:4000]
test_neg = all_pos[4000:]

train_x = train_pos+train_neg
test_x = test_pos+test_neg

train_y = np.append(np.ones((len(train_pos),1)), np.zeros((len(train_neg),1)), axis=0)
test_y = np.append(np.ones((len(test_pos),1)), np.zeros((len(test_neg),1)), axis=0)

freqs = build_freqs(train_x, train_y)

X = np.zeros((len(train_x), 3))
for i in range(len(train_x)):
    X[i, :]= extract_features(train_x[i], freqs)

# training labels corresponding to X
Y = train_y

# Apply gradient descent
J, theta = gradientDescent(X, Y, np.zeros((3, 1)), 1e-9, 1500)
print(f"The cost after training is {J:.8f}.")
print(f"The resulting vector of weights is {[round(t, 8) for t in np.squeeze(theta)]}")


tmp_accuracy = test_logistic_regression(test_x, test_y, freqs, theta)
print(f"Logistic regression model's accuracy = {tmp_accuracy:.4f}")


Пример #6
0
pix_per_cell = 8
cell_per_block = 1
hog_channel = "ALL" # Can be 0, 1, 2, or "ALL"
spatial = 32
histbin = 32
spatial_feat=True
hist_feat=True
hog_feat=True

#Extra image features
t=time.time()
X = []
for idx, file in enumerate(cars):
    car_features = extract_features(file, cspace=colorspace, spatial_size=(spatial, spatial),
                         hist_bins=histbin, hist_range=(0, 256),
                         orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block,
                         hog_channel=hog_channel, spatial_feat=spatial_feat, hist_feat=hist_feat,
                         hog_feat=hog_feat)
    X.append(car_features)
    if idx%100 == 0:
        print(idx)
for idx, file in enumerate(notcars):
    notcar_features = extract_features(file, cspace=colorspace, spatial_size=(spatial, spatial),
                         hist_bins=histbin, hist_range=(0, 256),
                         orient=orient, pix_per_cell=pix_per_cell, cell_per_block=cell_per_block,
                         hog_channel=hog_channel, spatial_feat=spatial_feat, hist_feat=hist_feat,
                         hog_feat=hog_feat)
    X.append(notcar_features)
    if idx%100 == 0:
        print(idx)
X = np.array(X)
Пример #7
0
    def hog_classify(self,
                     cars,
                     notcars,
                     color_space='YCrCb',
                     orient=7,
                     pix_per_cell=8,
                     cell_per_block=2,
                     hog_channel=0,
                     spatial_size=(32, 32),
                     sample_size=None,
                     hist_bins=32,
                     spatial_feat=True,
                     hist_feat=True,
                     hog_feat=True,
                     hist_range=None):
        """
        Train hog classifier
        :param orient:
        :param pix_per_cell:
        :param cell_per_block:
        :param hog_channel: Can be 0, 1, 2, or "ALL"
        :return:
        :param cars:
        :param notcars:
        :param sample_size:  Reduce the sample size because HOG features are slow to compute
        :param colorspace: Can be RGB, HSV, LUV, HLS, YUV, YCrCb
        :return:
        """
        if sample_size is not None:
            cars = cars[0:sample_size]
            notcars = notcars[0:sample_size]

        t = time.time()

        car_features = functions.extract_features(
            cars,
            color_space=color_space,
            orient=orient,
            pix_per_cell=pix_per_cell,
            cell_per_block=cell_per_block,
            hog_channel=hog_channel,
            spatial_size=spatial_size,
            hist_bins=hist_bins,
            spatial_feat=spatial_feat,
            hist_feat=hist_feat,
            hog_feat=hog_feat,
            hist_range=hist_range)

        notcar_features = functions.extract_features(
            notcars,
            color_space=color_space,
            orient=orient,
            pix_per_cell=pix_per_cell,
            cell_per_block=cell_per_block,
            hog_channel=hog_channel,
            spatial_size=spatial_size,
            hist_bins=hist_bins,
            spatial_feat=spatial_feat,
            hist_feat=hist_feat,
            hog_feat=hog_feat,
            hist_range=hist_range)
        t2 = time.time()
        print(round(t2 - t, 2), 'Seconds to extract HOG features...')

        # Create an array stack of feature vectors
        print(len(car_features))
        print(np.array(car_features).shape)

        X = np.vstack((car_features, notcar_features)).astype(np.float64)

        # Fit a per-column scaler
        X_scaler = StandardScaler().fit(X)
        # Apply the scaler to X
        scaled_X = X_scaler.transform(X)

        draw = False
        if draw:
            car_ind = np.random.randint(0, len(cars))
            # Plot an example of raw and scaled features
            fig = plt.figure(figsize=(12, 4))
            plt.subplot(131)
            plt.imshow(mpimg.imread(cars[car_ind]))
            plt.title('Original Image')
            plt.subplot(132)
            plt.plot(X[car_ind])
            plt.title('Raw Features')
            plt.subplot(133)
            plt.plot(scaled_X[car_ind])
            plt.title('Normalized Features')
            fig.tight_layout()

        # Define the labels vector
        y = np.hstack(
            (np.ones(len(car_features)), np.zeros(len(notcar_features))))

        # Split up data into randomized training and test sets
        rand_state = np.random.randint(0, 100)
        X_train, X_test, y_train, y_test = train_test_split(
            scaled_X, y, test_size=0.2, random_state=rand_state)

        print('Using:', orient, 'orientations', pix_per_cell,
              'pixels per cell and', cell_per_block, 'cells per block')
        print('Feature vector length:', len(X_train[0]))
        # Use a linear SVC
        svc = LinearSVC(C=self.C)
        # Check the training time for the SVC
        t = time.time()
        svc.fit(X_train, y_train)
        t2 = time.time()
        print(round(t2 - t, 2), 'Seconds to train SVC...')
        # Check the score of the SVC
        print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
        # Check the prediction time for a single sample
        t = time.time()
        n_predict = 100
        print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
        print('For these', n_predict, 'labels: ', y_test[0:n_predict])
        t2 = time.time()
        print(round(t2 - t, 5), 'Seconds to predict', n_predict,
              'labels with SVC')
        self.svc = svc
        self.X_scaler = X_scaler
        return n_predict