예제 #1
0
def default_friendly_and_malware():
    '''
    sample for dumpy friendly and malware
    '''
    friendly_apks = get_data_directory("friendly_apk")
    malware_apks  = get_data_directory("malware_apk")

    pscout_in = get_data_directory("training_data", "API_22", "API_22_parsed_api.csv")

    # friendly = make_apk_vector_folder(friendly_apks, pscout_in, extract_manifest_file=True, is_malware=False)
    malware  = make_apk_vector_folder(malware_apks,  pscout_in, extract_manifest_file=True, is_malware=True)
예제 #2
0
def parse_pscout_output(filename, api_lvl="API_22"):

    output_location = get_data_directory("training_data", api_lvl)

    if not os.path.exists(output_location):
        os.mkdir(output_location)

    output_file = os.path.join(output_location, api_lvl+"_parsed_api.csv")

    with open(filename) as pscout_input, open(output_file, "w") as output:
        raw_content = split_file = pscout_input.read()
        split_file = raw_content.split("\n")

        pscout_input.seek(0)

        line_numbers = []
        for line_num, line in enumerate(pscout_input):
            if line.startswith("Permission:"):
                line_numbers.append(line_num)
                
        line_numbers.append(len(split_file))

        results = []
        for i in range(len(line_numbers)-1):
            permission_res = get_list_of_apis(split_file[line_numbers[i]+2:line_numbers[i+1]])
            for index in range(len(permission_res)):
                permission_res[index] = [split_file[line_numbers[i]].split(".")[-1],] + permission_res[index]
            
            results += permission_res

        out_writer = csv.writer(output)
        out_writer.writerow(["Permission", "Function Name", "Return Value", "Arguments"])
        out_writer.writerows(results)
예제 #3
0
def main():

    argument = parse_arguments()
    apk_path = argument["input_path"]
    # "/Users/jeromemao/Desktop/EECS600/project/data/friendly_apk/1000_com.activefrequency.android.rockout.apk"
    api_lvl = argument["API_level"]
    pscout_in = get_data_directory("training_data", "API_{}".format(api_lvl),
                                   "API_{}_parsed_api.csv".format(api_lvl))

    vector = make_apk_vector(apk_path, pscout_in, extract_manifest_file=True)

    if argument["retrain"]:
        training_model(feature_selection=argument["feature"])

    classifier = get_classifier(API_lvl=api_lvl,
                                classfier_type=argument["model_name"])

    result = make_prediction(classifier, vector[:-1])

    print("\n---------------Results for {:40}---------------\n".format(
        os.path.basename(apk_path)))

    print("{:20} : {:.4f} (0=NOT malware, 1=malware)".format(
        "Prediction Result", result["Prediction Result"][0]))
    print("{:20} : {:.4f} (Combined with the result above)".format(
        "Actual Value", result["Confidential Interval"][0]))
예제 #4
0
    def encode(self):
        file_location = get_data_directory("permission_metadata", "permission_list.txt")
        with open(file_location) as permissions:

            temp_dict = dict()
            # print("PSCOUT", len(self.__pscout_readable_results))
            for keys in self.__pscout_readable_results:
                temp_dict[keys] = ["0" for _ in range(256)]
            
            for func in self.__function_used:
                for keys in self.__pscout_readable_results:
                    try:
                        index = self.__pscout_readable_results[keys].index(func)
                        # print(keys, index)
                        temp_dict[keys][index] = "1"
                    except (ValueError, IndexError):
                        continue
            
            result_vector = []
            for permission in permissions.read().split("\n")[:-1]:
                try:
                    sliced = [temp_dict[permission][i:i+64] for i in range(0, 256, 64)]
                    for num in sliced:
                        result_vector.append(int("".join(num), base=2))
                except KeyError:
                    for _ in range(4):
                        result_vector.append(0)


            return result_vector
예제 #5
0
def get_classifier(API_lvl=22, classfier_type="Random_Forest"):
    classfier_path = get_data_directory(
        "training_data", "API_{}".format(API_lvl),
        "API_{}_{}".format(API_lvl, classfier_type))

    assert os.path.exists(
        classfier_path
    ), "Classifier does not exist. Do you want to train a new one?"

    with open(classfier_path, "rb") as pickle_can:
        clf = pickle.load(pickle_can)
        return clf
예제 #6
0
def training_model(API_level=22,
                   dump_model=True,
                   load_model=False,
                   model_location=None,
                   full_train=False,
                   feature_selection="All",
                   model_name="Random Forest",
                   **model_param):

    if load_model:
        assert not (model_location is
                    None), "You have to tell where the <pickled> model is"

    data_path = get_data_directory(
        "training_data", "API_{}".format(API_level),
        "API_{}_training_final.txt".format(API_level))

    huge_data = np.loadtxt(data_path)

    if feature_selection == "All":
        X = huge_data[:, :-1]

    elif feature_selection == "Permission Only":
        X = huge_data[:, :150]

    elif feature_selection == "API Only":
        X = huge_data[:, 150:-1]

    y = huge_data[:, -1]

    if not full_train:
        X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                            y,
                                                            shuffle=True,
                                                            train_size=0.9)

    else:
        X_train, X_test, Y_train, Y_test = X, None, y, None

    k_fold = KFold(len(Y_train), n_folds=10, shuffle=True, random_state=0)

    if not load_model:
        if model_name == "Random Forest":
            clf = RandomForestClassifier(**model_param)

        elif model_name == "Neural Network":
            clf = MLPClassifier(**model_param)
    else:
        with open(model_location, "rb") as pickle_can:
            clf = pickle.load(pickle_can)

    score_list = cross_val_score(clf,
                                 X_train,
                                 Y_train,
                                 cv=k_fold,
                                 n_jobs=1,
                                 scoring="accuracy")
    print("[Model   Name] ", model_name)
    print("[Trained with] {:5d} samples".format(len(X_train)))
    print("Ten Fold Cross Validation Accuracy : {:.2%} (+/-{:.2%})".format(
        np.mean(score_list), np.std(score_list)))

    clf.fit(X_train, Y_train)

    if not full_train:
        generate_roc_curve(clf, X_test, Y_test)

    if dump_model:

        pickle_to = get_data_directory(
            "training_data", "API_{}".format(API_level),
            "API_{}_{}".format(API_level, "_".join(model_name.split())))

        with open(pickle_to, "wb") as pickle_can:
            pickle.dump(clf, pickle_can)

    return clf
예제 #7
0
def load_dangerous_permissions():

    file_location = get_data_directory("permission_metadata", "dangerous_permission_list.txt")

    with open(file_location) as input_file:
        return input_file.read().split()
def load_list_of_permissions():

    file_location = get_data_directory("permission_metadata",
                                       "permission_list.txt")
    with open(file_location) as permission_source:
        return permission_source.read().split()
예제 #9
0
    sample for dumpy friendly and malware
    '''
    friendly_apks = get_data_directory("friendly_apk")
    malware_apks = get_data_directory("malware_apk")

    pscout_in = get_data_directory("training_data", "API_22",
                                   "API_22_parsed_api.csv")

    # friendly = make_apk_vector_folder(friendly_apks, pscout_in, extract_manifest_file=True, is_malware=False)
    malware = make_apk_vector_folder(malware_apks,
                                     pscout_in,
                                     extract_manifest_file=True,
                                     is_malware=True)

    # dump_to = get_data_directory("training_data", "API_22", "API_22_training.txt")

    # np.savetxt(dump_to, friendly + malware)


if __name__ == "__main__":
    pscout_in = get_data_directory("training_data", "API_22",
                                   "API_22_parsed_api.csv")
    r = make_apk_vector(sys.argv[1],
                        pscout_in,
                        extract_manifest_file=True,
                        is_malware=False)

    print(" ".join([str(num) for num in r]))
    # gc.set_debug(gc.DEBUG_LEAK)
    # make_apk_vector_folder("/Users/jeromemao/Desktop/EECS600/project/data/friendly_apk", "")
예제 #10
0
def main():

    db = pymysql.connect("localhost","mayank","mayank@25","Majorproject" )
    print("\n*******************************************************************************************************************************************************")
    print("***************************************************************  Malware Analysis Tool  ***************************************************************")
    print("*******************************************************************************************************************************************************")
    argument = parse_arguments()
    apk_path = argument["input_path"]
    print("\n\nCalculating MD5 Signature.......")
    s(2)
    md5 = hashlib.md5(open(apk_path,'rb').read()).hexdigest()
    print(md5)
    print("\nCalculating SHA1 Signature.......")
    s(2)
    s1 = "cat " +  apk_path + "| shasum | tr -d '  -'"
    os.system(s1)
    print("\n*******************************************************************************************************************************************************")
    print("*********************************************************  Performing Signature Based Analysis  *******************************************************")
    print("*******************************************************************************************************************************************************\n\n")
    s(7)
    sigMalware = check(md5)
    if sigMalware == 0:
        print("Matching Signatures found in Database")
        print("Not a Malware")
    elif sigMalware == 1:
        print("Matching Signature found in Database")
        print("Definitely Malware")
    else:
        print("No Matching Signatures found in Database")
        print("Signature Based Analysis Failed\n\n")
    s(4)
    print("\n\n\n*******************************************************************************************************************************************************")
    print("*********************************************************  Performing Permission Based Analysis  ******************************************************")
    print("*******************************************************************************************************************************************************")
    print("\n\nGathering Permissions from AndroidManifest.xml for Analysis")
    print("Displaying Results")
    s(5)
    s2 = "perl apkperm.pl " + apk_path
    os.system(s2)
    print("\n*******************************************************************************************************************************************************")
    print("*********************************************************  Performing API Calls Based Analysis  *******************************************************")
    print("*******************************************************************************************************************************************************")
    print("\n\nGathering API Calls made by the Application for Analysis")
    print("Displaying Results\n")
    pth = "rm -rf " + apk_path.split(".")[0]
    os.system(pth)
    s(5)
    s3 = "perl apkapi.pl " + apk_path 
    os.system(s3)
    argument = parse_arguments()
    apk_path = argument["input_path"]
    # "~/Desktop/EECS600/project/data/friendly_apk/1000_com.activefrequency.android.rockout.apk"
    api_lvl = argument["API_level"]
    pscout_in = get_data_directory("training_data", "API_{}".format(api_lvl), "API_{}_parsed_api.csv".format(api_lvl))
    
    vector = make_apk_vector(apk_path, pscout_in, extract_manifest_file=True)

    if argument["retrain"]:
        training_model(feature_selection=argument["feature"])

    classifier = get_classifier(API_lvl=api_lvl, classfier_type=argument["model_name"])

    result = make_prediction(classifier, vector[:-1])
    s(10)
    print("\n*******************************************************************************************************************************************************")
    print("\n************************************************************* Results for " + apk_path + " *************************************************************\n")
    print("*******************************************************************************************************************************************************")
    print("Default Conventions:\t0=NOT malware, 1=malware")
    print("Calculated Value : " + str(result["Confidential Interval"][0]))
    print("{:20} : {:.4f} ".format("Prediction Result", result["Prediction Result"][0]))
    if result["Prediction Result"][0] == 0:
        print("The Supplied APK Does not contain malware. Feel Free to install it")
    else:
        print("The Supplied APK contains malware. Do not install or Share")