Пример #1
0
def predict_url(url):
    try:
        if isinstance(url, str):
            features = mod_feature_extraction.extract_features_from_URL(url, "PREDICT", True)
            x_pred = pd.DataFrame(features)
        else:
            x_pred = url

        x_pred = transform_data(x_pred)


        random_forest = load_model()
        y_pred = random_forest.predict(x_pred)

        return y_pred.tolist()

    except Exception as e:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(filename, lineno, line.strip(), exc_obj))
        log(action_logging_enum=WARNING, logging_text=str(e))
        log(action_logging_enum=WARNING, logging_text=str(e.__traceback__))
        return None
Пример #2
0
def predict_url(url):
    try:
        features = mod_feature_extraction.extract_features_from_URL(
            url, "PREDICT", True)
        x_pred = pd.DataFrame(features)
        x_pred = x_pred.drop(["Label", "ID", "URL"], axis=1)
        y_pred = adaptive_boosting_pre_loaded.predict(x_pred)
        result = "NO RESULT"
        int_result = 0

        if str(y_pred[0]) == "0":
            result = "Benign"

        if str(y_pred[0]) == "1":
            result = "Phish"

    except Exception as e:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        log(
            INFO, 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(
                filename, lineno, line.strip(), exc_obj))
        log(action_logging_enum=WARNING, logging_text=str(e))
        log(action_logging_enum=WARNING, logging_text=str(e.__traceback__))
        return 0

    log(action_logging_enum=INFO,
        logging_text="[ADAPTIVE BOOSTING]: Result for URL [{u}] = {l}".format(
            u=str(url), l=result))
    return result
Пример #3
0
def predict_url(url):
    support_vector_machine = load_model()
    x_pred = pd.DataFrame(extract_features_from_URL(url, "PREDICT", True))
    x_pred = transform_data(x_pred)
    y_pred = support_vector_machine.predict(x_pred)
    result = "NO RESULT"

    if str(y_pred[0]) == "0":
        result = "Benign"

    if str(y_pred[0]) == "1":
        result = "Phish"

    log(action_logging_enum=INFO,
        logging_text="[SUPPORT VECTOR MACHINE]: Result for URL [{u}] = {l}".format(u=url, l=result))
    return result
Пример #4
0
def predict_url(url):
    logistic_regression = load_model()
    x_pred = pd.DataFrame(extract_features_from_URL(url, "PREDICT", True))
    x_pred = transform_data(x_pred)
    y_pred = logistic_regression.predict(x_pred)

    result = "NO RESULT"

    if str(y_pred[0]) == "0":
        result = "Benign"

    if str(y_pred[0]) == "1":
        result = "Phish"

    log(action_logging_enum=INFO,
        logging_text="[LOGISTIC REGRESSION]: Result for URL [{u}] = {l}".format(u=url, l=result))
    return result
Пример #5
0
def predict_url(url):
    knn = load_model()
    x_pred = pd.DataFrame(extract_features_from_URL(url, "PREDICT", True))
    x_pred = transform_data(x_pred)

    y_pred = knn.predict(x_pred)

    result = "NO RESULT"

    if str(y_pred[0]) == "0":
        result = "Benign"

    if str(y_pred[0]) == "1":
        result = "Phish"

    log(action_logging_enum=INFO,
        logging_text="[K-NEAREST NEIGHBOR]: Result for URL [{u}] = {l}".format(
            u=url, l=result))
Пример #6
0
def predict_url(url):
    decision_tree = load_model()
    x_pred = pd.DataFrame(extract_features_from_URL(url, "PREDICT", True))
    x_pred = transform_data(x_pred)
    y_pred = decision_tree.predict(x_pred)

    result = "NO RESULT"

    if str(y_pred[0]) == "0":
        result = "Benign"

    if str(y_pred[0]) == "1":
        result = "Phish"

    log(action_logging_enum=INFO,
        logging_text="[DECISION TREE]: Result for URL [{u}] = {l}".format(
            u=url, l=result))
    return result
Пример #7
0
def predict_url(url):
    try:
        features = mod_feature_extraction.extract_features_from_URL(
            url, "PREDICT", True)
        x_pred = pd.DataFrame(features)

        for index, col in enumerate(x_pred.columns):
            if x_pred[col].dtype == np.bool:
                name = x_pred.iloc[:, index].name
                x_pred[name] = x_pred[name].astype(int)

        x_pred = x_pred.drop(["Label", "ID", "URL", "Final URL"], axis=1)
        y_pred = extreme_gradient_pre_loaded.predict(x_pred)

        result = "NO RESULT"
        int_result = 0

        if str(y_pred[0]) == "0":
            result = "Benign"

        if str(y_pred[0]) == "1":
            result = "Phish"
            int_result = 1
    except Exception as e:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        log(
            ERROR, 'EXCEPTION IN ({}, LINE {} "{}"): {}'.format(
                filename, lineno, line.strip(), exc_obj))
        log(action_logging_enum=WARNING, logging_text=str(e))
        log(action_logging_enum=WARNING, logging_text=str(e.__traceback__))
        return 0

    log(action_logging_enum=INFO,
        logging_text="[EXTREME GRADIENT BOOSTING]: Result for URL [{u}] = {l}".
        format(u=str(url), l=result))

    return int_result
Пример #8
0
def predict_url(url, proba=False):
    try:
        if isinstance(url, str):
            features = mod_feature_extraction.extract_features_from_URL(
                url, "PREDICT", True)
            x_pred = pd.DataFrame(features)
        else:
            x_pred = url

        x_pred = x_pred[[
            'Entropy', 'Ratio Netloc/URL', 'Length URL', 'Ratio Digit/Letter',
            'Ratio Path/URL', 'Has HTTPS', 'Length Netloc', 'KL Divergence',
            'Ratio Vowel/Consonant', 'Number Symbols', 'Number Dots',
            'Number Tokens Netloc', 'Number Digits Path', 'Ratio Cap/NonCap',
            'Number Dash', 'Number Dash Netloc', 'Has Token Netloc',
            'Number Slash Path', 'Ratio Query/URL', 'Number Digits Netloc',
            'Number Redirects', 'Number PhishyTokens Path',
            'Has Digits Netloc', 'Number Query Parameters',
            'Number Dots Netloc', 'Has Query', 'Number Equals',
            'Number Semicolon', 'Number Ampersand', 'Cert Created Shortly',
            'Number Stars'
        ]]
        if proba:
            y_pred = random_forest_pre_loaded.predict_proba(x_pred)
        else:
            y_pred = random_forest_pre_loaded.predict(x_pred)

        return y_pred

    except Exception as e:
        exc_type, exc_obj, tb = sys.exc_info()
        f = tb.tb_frame
        lineno = tb.tb_lineno
        filename = f.f_code.co_filename
        linecache.checkcache(filename)
        line = linecache.getline(filename, lineno, f.f_globals)
        print('EXCEPTION IN ({}, LINE {} "{}"): {}'.format(
            filename, lineno, line.strip(), exc_obj))
        log(action_logging_enum=WARNING, logging_text=str(e))
        log(action_logging_enum=WARNING, logging_text=str(e.__traceback__))
        return None