def bytes_2_grams(input_, output, limit): print 'Processing', input_ with open(output, 'w') as f: for row in crunch(input_, '.bytes', two_grams, limit=limit): f.write(ujson.dumps(row)) f.write('\n') print 'Saved to', output
def bytes_1_grams(input_, output, limit): print 'Processing', input_ rows = [] for row in crunch(input_, '.bytes', one_grams, limit=limit): rows.append(row) pd.DataFrame(rows).to_csv(output) print 'Saved to', output
def bytes_2_grams(input_, output, limit): print "Processing", input_ with open(output, "w") as f: for row in crunch(input_, ".bytes", two_grams, limit=limit): f.write(ujson.dumps(row)) f.write("\n") print "Saved to", output
def bytes_1_grams(input_, output, limit): print "Processing", input_ rows = [] for row in crunch(input_, ".bytes", one_grams, limit=limit): rows.append(row) pd.DataFrame(rows).to_csv(output) print "Saved to", output
import numpy as np import pe_parser from sevenz_cruncher import crunch import config import sys def process_asm(key, lines, folder): features = pe_parser.parse(lines) np.savez_compressed(folder % key, features) return 1 if __name__ == '__main__': env = sys.argv[1] folder = config.locate_file(env, config.conf[env]['asm_folder']) limit = config.conf[env].get('limit', None) crunch(config.conf[env]['input'], '.asm', lambda key, lines: process_asm(key, lines, folder + '/%s'), limit=limit)