Exemplo n.º 1
0
def calculate_features():
    with open(
            RAW_DATA_FILE_PATH, "w"
    ) as raw_data_file:  # this causes error when calling from a different path
        # write header
        raw_data_file.write(
            "is_phishing,is_masquerading,can_access,html_has_same_domain,has_password_field,uses_stylesheet_naver,check_title,has_correct_favicon,check_post_action\n"
        )

        with open(TRAINING_SET_FILE_PATH, "r") as training_set:
            lines = training_set.readlines()
            for line in lines:
                line = line.rstrip()
                url = line.split(",")[0]
                is_phishing = line.split(",")[1]

                raw_data_file.write(is_phishing + ",")

                # calculate each feature
                result, mod = is_masquerading(url)
                raw_data_file.write(result + ",")

                result, resp, mod = can_access(url)
                raw_data_file.write(result + ",")
                # if web page cannot be accessed, other modules will not work
                if result != UNKNOWN:

                    result, mod = html_has_same_domain(url, resp)
                    raw_data_file.write(result + ",")

                    result, mod = has_password_field(resp)
                    raw_data_file.write(result + ",")

                    result, mod = uses_stylesheet_naver(resp)
                    raw_data_file.write(result + ",")

                    result, mod = check_title(url, resp)
                    raw_data_file.write(result + ",")

                    result, mod = has_correct_favicon(url, resp)
                    raw_data_file.write(result + ",")

                    result, mod = check_post_action(resp)
                    raw_data_file.write(result)

                else:
                    raw_data_file.write("U,U,U,U,U,U")

                raw_data_file.write("\n")
        training_set.close()
Exemplo n.º 2
0
def integrate(url):
    print("Checking:\t" + url)

    result = UNKNOWN

    result, mod = is_masquerading(url)
    if result != UNKNOWN:
        finish_check(mod, result)
        return result

    result, resp, mod = can_access(url)
    if result != UNKNOWN:
        result, mod = html_has_same_domain(url, resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        result, mod = has_password_field(resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        result, mod = uses_stylesheet_naver(resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        result, mod = check_title(url, resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        result, mod = has_correct_favicon(url, resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        result, mod = check_post_action(resp)
        if result != UNKNOWN:
            finish_check(mod, result)
            return result
        else:
            result = "S"
            mod = "NOTHING"
    else:
        result = "S"
        mod = "PAGE INACCESSIBLE"

    finish_check(mod, result)
    return result
Exemplo n.º 3
0
def integrate(url):
    result = "U"

    r, mod = is_masquerading(url)
    if r != "U":
        print "Detect By ", mod
        return r

    r, resp, mod = can_access(url)
    if r != "U":
        r, mod = html_has_same_domain(url, resp)
        if r != "U":
            print "Detect By ", mod
            return r
        r, mod = has_password_field(resp)
        if r != "U":
            print "Detect By ", mod
            return r
        r, mod = uses_stylesheet_naver(resp)
        if r != "U":
            print "Detect By ", mod
            return r
        r, mod = check_title(url, resp)
        if r != "U":
            print "Detect By ", mod
            return r
        r, mod = favicon(url, resp)
        if r != "U":
            print "Detect By ", mod
            return r
        # r, mod = check_validOfpost_action(resp)
        # if r != "U":
        #     print "Detect By ", mod
        #     return r
        print "Detect By * Nothing! *"
        return "S"

    if result == "U":
        result = "S"
        mod = "* can_access FAILED *"

    print "Detect By ", mod
    return result
Exemplo n.º 4
0
def calculate_features():
    def write_encoded_features(write_file, result):
        result_encoded = "0"
        if (result == "U"):
            result_encoded = "0"
        elif (result == "SL"):
            result_encoded = "-0.5"
        elif (result == "S"):
            result_encoded = "-1"
        elif (result == "PL"):
            result_encoded = "0.5"
        elif (result == "P"):
            result_encoded = "1"
        write_file.write("," + result_encoded)

    with open(
            TRAINING_SET_ANALYZED, "w"
    ) as ts_analyzed:  # this causes error when calling from a different path
        # write header
        ts_analyzed.write(
            "url,is_phishing,is_masquerading,html_has_same_domain,has_password_field,check_post_action\n"
        )

        with open(TRAINING_SET, "r") as training_set:
            lines = training_set.readlines()
            counter = 0
            can_access_error_count = 0
            for line in lines[1:]:
                try:
                    line = line.rstrip()

                    url = line.split(",")[0]
                    is_phishing = line.split(",")[1]

                    counter += 1
                    with open(LOG, "a+") as log:
                        log.write("Count " + str(counter) + ": " + url + "\n")
                        print("Count " + str(counter) + ": " + url)
                    log.close()

                    # if web page cannot be accessed, other modules will not work
                    result, resp, mod = can_access(url)
                    if result != "U":

                        ts_analyzed.write(url)
                        ts_analyzed.write("," + is_phishing)

                        # calculate each feature
                        result, mod = is_masquerading(url)
                        write_encoded_features(ts_analyzed, result)

                        result, mod = html_has_same_domain(url, resp)
                        write_encoded_features(ts_analyzed, result)

                        result, mod = has_password_field(resp)
                        write_encoded_features(ts_analyzed, result)
                        """
                        result, mod = uses_stylesheet_naver(resp)
                        write_encoded_features(ts_analyzed, result)

                        result, mod = check_title(url, resp)
                        write_encoded_features(ts_analyzed, result)

                        result, mod = has_correct_favicon(url, resp)
                        write_encoded_features(ts_analyzed, result)
                        """

                        result, mod = check_post_action(resp)
                        write_encoded_features(ts_analyzed, result)

                        ts_analyzed.write("\n")
                    else:
                        can_access_error_count += 1
                        with open(LOG, "a+") as log:
                            log.write("can_access error\n")
                            print("can_access error")
                        log.close()
                except:
                    with open(ERROR_LOG, "a+") as error_log:
                        error_log.write("Count " + str(counter) + ": " + url +
                                        "\n")
                        error_log.write(sys.exc_info()[0])
                    error_log.close()
        training_set.close()
    ts_analyzed.close()