Пример #1
0
def get_matrix(mail):
    file = get_full_path('data/psocols.txt')
    with open(file, 'rb') as f:
        lexicon = pickle.load(f)
    lex = np.zeros(len(lexicon))
    for word in mail:
        if word.lower() in lexicon:
            index_value = lexicon.index(word.lower())
            lex[index_value] += 1
    features = np.array(lex, dtype=np.float32)

    return features
Пример #2
0
def predict(body):
    file_content = utils.Process(body)
    mail = file_content.process()
    matrix = np.array([encoder.get_matrix(mail)])

    model = load_model(get_full_path('data/model.h5'))
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    result = model.predict_classes(matrix)
    backend.clear_session()
    return predictions.get(result[0])
Пример #3
0
def upload():
    blacklist = open(get_full_path(blacklist_dir)).read()
    if request.method == 'POST':
        file = request.files['file']
        session.clear()
        if file and allowed_file(file.filename):
            filename = get_file_name(file.filename)
            if black_checker(filename):
                return render_template(
                    'base.html',
                    blacklist=open(blacklist_dir).read(),
                    message='This sender has been blacklisted!!')
            else:
                content = file.read()
                detect = predict(content)
                if detect == 'fraud':
                    update_blacklist(filename)
                    return render_template(
                        'base.html',
                        content=content,
                        blacklist=open(blacklist_dir).read(),
                        message=' This is a fraudulent mail.'
                        'Therefore,the mail has been updated on the blacklist')
                elif detect == 'ham':
                    render_template('base.html',
                                    blacklist=blacklist,
                                    content=content)
                elif detect == 'spam':
                    return render_template(
                        'base.html',
                        blacklist=blacklist,
                        message=
                        'This mail is spam, therefore it will be sent to spam folder'
                    )
                redirect(
                    url_for('upload', blacklist=blacklist, content=content))
        return render_template('base.html',
                               blacklist=blacklist,
                               content=file.read())
    else:
        return render_template('base.html', blacklist=blacklist)
Пример #4
0
from flask import Flask, request, session
from core import predict
from flask_bootstrap import Bootstrap
from flask import url_for, redirect, render_template
from dir import get_full_path
import nltk

app = Flask(__name__)

bootstrap = Bootstrap(app)

app.config['SECRET_KEY'] = 'hard to guess string'

ALLOWED_EXTENSIONS = ['txt', 'csv']
blacklist_dir = get_full_path('data/blacklist.txt')


@app.route('/', methods=['GET', 'POST'])
def upload():
    blacklist = open(get_full_path(blacklist_dir)).read()
    if request.method == 'POST':
        file = request.files['file']
        session.clear()
        if file and allowed_file(file.filename):
            filename = get_file_name(file.filename)
            if black_checker(filename):
                return render_template(
                    'base.html',
                    blacklist=open(blacklist_dir).read(),
                    message='This sender has been blacklisted!!')
            else:
Пример #5
0
from dir import get_full_path
from nltk.corpus import words


def check(word):
    if word in words.words():
        return True
    else:
        return False


def worded(text):
    return re.findall(r'\w+', text.lower())


WORDS = Counter(worded(open(get_full_path('data/big.txt')).read()))


def P(word, N=sum(WORDS.values())):
    """Probability of `word`."""
    return WORDS[word] / N


def correction(word):
    """Most probable spelling correction for word."""
    return max(candidates(word), key=P)


def candidates(word):
    """Generate possible spelling corrections for word."""
    return (known([word]) or known(edits1(word)) or known(edits2(word))