Exemplo n.º 1
0
def feature_extraction(process_function,
                       outputfilename,
                       delimeter=',',
                       dataset='dataset',
                       extensionList=['.exe', '.dll']):
    """
    Get a dataset collect all information and  write them into file.

    Attention ! Assume that filename is file hash value or unique value for the file
    :param process_function: the function pointer which will be executed to extract feature. Key point here is that its return type is dictionary
    :type process_function:func*
    :param outputfilename: name of documentary which the result of csv file will be put
    :type outputfilename: str
    :param dataset: Dataset name
    :type dataset:str
    :param extensionList: File extension list whose is on processing
    :type extensionList: list

    :return : void
    """

    if os.path.isfile(dataset):
        pass
    elif os.path.isdir(dataset):
        listOfFile = fileutil.getFilePaths(dataset, extensionList)

        # Collect information from source file
        content_list = []
        for index, filename in enumerate(listOfFile):
            # Assume that filename is file hash value
            hash_id = os.path.basename(filename).split('.')[0]
            class_id = os.path.dirname(filename)

            # Get content
            #filename = os.getcwd()+os.sep+filename

            content = process_function(filename, delimeter=delimeter)

            if content is not None:
                content['hash'] = hash_id
                content['class_id'] = class_id
                content_list.append(content)

            print(outputfilename + "-" + str(index) + "  -  " +
                  str(len(listOfFile)))

            # Write informations into csv file
            outfile = os.path.dirname(
                filename) + os.sep + 'csv' + os.sep + outputfilename
            try:
                output.writeSingleIntoCSVFile(outfile, content_list, delimeter)
            except IOError as ioe:
                print(str(ioe))
                pause()

    else:
        print('File type must be file or directory.')
Exemplo n.º 2
0
def mergeAll(filespath, csvOut, selectedColumn=[]):
    files = fileutil.getFilePaths(filespath)
    dictlist = []
    for file in files:
        f1 = open(file)
        reader1 = csv.DictReader(f1)
        dicts = (list(reader1))
        if (selectedColumn is not []):
            dicts = narrowDict(dicts, selectedColumn)
        dictlist += dicts
        f1.close()
    return writeIntoCSVFile(csvOut, dictlist)
Exemplo n.º 3
0
def opcodeExtraction(dir, delimeter=","):
    fls = fu.getFilePaths(dir, [".asm"])
    for fln in fls:
        dirname, filename, cname = fu.fileNameSettings(fln, ".opcode",
                                                       "opcode")
        print(cname)
        if (os.path.isfile(cname)):
            continue
        a, d = opcodeUtils.get(fln)
        opcodeseq = opcodeUtils.opcodeSeq(a)
        opss = delimeter.join(opcodeseq)
        writeFile(cname, opss)
Exemplo n.º 4
0
def asmbuilding(dir, disName, disassembler, isWriting=True):
    """
    extracting asm from files in dir by using belirtilen disassembler
    :param dir: directory where files are disassembled
    :param disName: disassembler name
    :param disassembler: abstract dissambler
    :return:
    """
    fls = fu.getFilePaths(dir, [".exe"])
    for fln in fls:
        dirname, filename, cname = fu.fileNameSettings(fln, ".asm", disName)
        assembly = disassembler.getDisassembledCode(fln)
        if isWriting:
            writeFile(cname, assembly)
        print("klasor:" + dirname + " dosyadi:" + filename + " disassembler:" +
              disName)
Exemplo n.º 5
0
def make(dirname, apply, resultfile, cv=10):
    '''
    train set test set and their class values are prepared according to cross-validation and tag type.
    :param type: flag for classes of data. In (d)etection mode, there are 2 classes, malware and benign. In classicifation mode, all different tags are class
    :param cv: cross validation number
    :return: according to cross validation, 4 matrix; xtrain (cv,sr,f), xtest(cv,ss,f), ytrain(cv,sr), ytest(cv,ss) - cv: cross-valid, sr:count of sample in train set, f:feature number,
    ss:count of sample in test set
    '''
    from joblib import Parallel, delayed
    import multiprocessing
    csvfiles = fu.getFilePaths(dirname, extensionList=[
        ".csv"
    ])  # for each disassembly result there should be another csv file
    num_cores = multiprocessing.cpu_count()
    results = Parallel(n_jobs=num_cores)(delayed(apply)(csv, cv)
                                         for csv in csvfiles)
    #results = apply(csvfiles[0],cv)
    results.append(csvfiles)
    print(results)
    report(results, resultfile)
Exemplo n.º 6
0
def core_process(process_function,
                 output_ext,
                 delimeter=',',
                 dataset='dataset',
                 extensionList=['.exe', '.dll'],
                 feature_type="text"):
    """
    Get a dataset collect all information and  write them into file.

    Attention ! Assume that filename is file hash value or unique value for the file

    :param data_name: Dataset name
    :type data_name:str
    :param extensionList: File extension list
    :type extensionList: list
    :return : void
    """

    if os.path.isfile(dataset):
        pass
    elif os.path.isdir(dataset):
        listOfFile = fileutil.getFilePaths(dataset, extensionList)

        # Collect information from source file
        for index, filename in enumerate(listOfFile):

            # Get content
            content = process_function(filename, delimeter=delimeter)
            if content is not None:
                try:
                    if feature_type == "seq":
                        output.writeFeatureAsSequence(filename, output_ext,
                                                      content)
                    else:
                        output.writeIntoFile(filename, output_ext, content)
                except IOError as ioe:
                    print(str(ioe))
                    pause()
            print(str(index) + "  -  " + str(len(listOfFile)))
    else:
        print('File type must be file or directory.')