def bytes_2_grams(input_, output, limit):
    print 'Processing', input_

    with open(output, 'w') as f:
        for row in crunch(input_, '.bytes', two_grams, limit=limit):
            f.write(ujson.dumps(row))
            f.write('\n')

    print 'Saved to', output
def bytes_1_grams(input_, output, limit):
    print 'Processing', input_

    rows = []
    for row in crunch(input_, '.bytes', one_grams, limit=limit):
        rows.append(row)

    pd.DataFrame(rows).to_csv(output)
    print 'Saved to', output
def bytes_2_grams(input_, output, limit):
    print "Processing", input_

    with open(output, "w") as f:
        for row in crunch(input_, ".bytes", two_grams, limit=limit):
            f.write(ujson.dumps(row))
            f.write("\n")

    print "Saved to", output
def bytes_1_grams(input_, output, limit):
    print "Processing", input_

    rows = []
    for row in crunch(input_, ".bytes", one_grams, limit=limit):
        rows.append(row)

    pd.DataFrame(rows).to_csv(output)
    print "Saved to", output
Exemplo n.º 5
0
import numpy as np
import pe_parser
from sevenz_cruncher import crunch
import config
import sys


def process_asm(key, lines, folder):
    features = pe_parser.parse(lines)
    np.savez_compressed(folder % key, features)
    return 1


if __name__ == '__main__':
    env = sys.argv[1]

    folder = config.locate_file(env, config.conf[env]['asm_folder'])
    limit = config.conf[env].get('limit', None)
    crunch(config.conf[env]['input'],
           '.asm',
           lambda key, lines: process_asm(key, lines, folder + '/%s'),
           limit=limit)
import numpy as np
import pe_parser
from sevenz_cruncher import crunch
import config
import sys

def process_asm(key, lines, folder):
    features = pe_parser.parse(lines)
    np.savez_compressed(folder % key, features)
    return 1

if __name__ == '__main__':
    env = sys.argv[1]

    folder = config.locate_file(env, config.conf[env]['asm_folder'])
    limit = config.conf[env].get('limit', None)
    crunch(config.conf[env]['input'], '.asm',
           lambda key, lines: process_asm(key, lines, folder + '/%s'),
           limit=limit)