def evaluate_lgb(lightgbm_model_file, results_dir, db_path=config.db_path, remove_missing_features='scan' ): """ Take a trained lightGBM model and perform an evaluation on it. Results will be saved to results.csv in the path specified in results_dir :param lightgbm_model_file: Full path to the trained lightGBM model :param results_dir: The directory to which to write the 'results.csv' file; WARNING -- this will overwrite any existing results in that location :param db_path: the path to the directory containing the meta.db file; defaults to the value in config.py :param remove_missing_features: See help for remove_missing_features in train.py / train_network """ os.system('mkdir -p {}'.format(results_dir)) logger.info(f'Loading lgb model from {lightgbm_model_file}') model = lgb.Booster(model_file=lightgbm_model_file) generator = get_generator(mode='test', path=db_path, use_malicious_labels=True, use_count_labels=False, use_tag_labels=False, return_shas=True, remove_missing_features=remove_missing_features) logger.info('running lgb evaluation') f = open(os.path.join(results_dir, 'results.csv'), 'w') first_batch = True for shas, features, labels in tqdm.tqdm(generator): predictions = {'malware':model.predict(features)} results = normalize_results(labels, predictions, use_malware=True, use_count=False, use_tags=False) pd.DataFrame(results, index=shas).to_csv(f, header=first_batch) first_batch = False f.close() print('...done')
def process_line(line): if line.strip() == '': # Don't process empty lines any further if cCountEmptyLines: return "\\State", None, False, 0 else: return "\\Statex", None, False, 0 sp = line.split("#") comment = "" if len(sp) > 1: if len(sp[-2]) == 0 or not sp[-2][-1] == "\\": comment = sp[-1] line = "\\#".join(sp[:-1]) else: if not len(sp[-2]) == 0: sp[-2] = sp[-2][:-1] line = "\\#".join(sp) comment = comment.strip() line = line.strip() line = preprocess(line) terminator = None process_lvl = False transform = 0 if line == "": line = generate_comment_line(comment) else: keyword = get_keyword(line) generator = get_generator(keyword) line, terminator, process_lvl, transform = generator(line) if not comment == "": line += " \\Comment{\ " + comment + "}" return line, terminator, process_lvl, transform # Add generated line to result
def dump_data_to_numpy(mode, output_file, workers=1, batchsize=1000, remove_missing_features='scan'): """ Produce numpy files required for training lightgbm model from SQLite + LMDB database. :param mode: One of 'train', 'validation', or 'test' representing which set of the data to process to file. Splits are obtained based on timestamps in config.py :param output_file: The name of the output file to produce for the indicated split. :param workers: How many worker processes to use (default 1) :param batchsize: The batch size to use in collecting samples (default 1000) :param remove_missing_features: How to check for and remove missing features; see README.md for recommendations (default 'scan') """ _generator = get_generator(path=db_path, mode=mode, batch_size=batchsize, use_malicious_labels=True, use_count_labels=False, use_tag_labels=False, num_workers = workers, remove_missing_features=remove_missing_features, shuffle=False) feature_array = [] label_array = [] for i, (features, labels) in enumerate(_generator): feature_array.append(deepcopy(features.numpy())) label_array.append(deepcopy(labels['malware'].numpy())) sys.stdout.write(f"\r{i} / {len(_generator)}") sys.stdout.flush() np.savez(output_file, feature_array, label_array) print(f"\nWrote output to {output_file}")
def run(x_train, x_test, y_train_one_hot, y_test, model, fit_generator=False, epochs=8, steps_per_epoch=20): if fit_generator: generate = generators.get_generator(x_train, y_train_one_hot) model.fit_generator(generate(), steps_per_epoch=steps_per_epoch, epochs=epochs, verbose=0) else: model.fit(x_train, y_train_one_hot, nb_epoch=epochs, batch_size=BATCH_SIZE) pred = model.predict_classes(x_test, 10) print(pred) print(y_test) print('acc: ', metrics.accuracy_score(pred, y_test)) print('f1: ', metrics.f1_score(pred, y_test)) return metrics.accuracy_score(pred, y_test), metrics.f1_score(pred, y_test)
def play(pathinfo): content, info = _parse_path(pathinfo) if not content: xbmcgui.Dialog().notification(L(34201), pathinfo['full path']) return singlevideo = pathinfo.get('singlevideo', False) try: showbusy = get_main_addon().getSetting('hidebusydialog') == 'false' get_player(get_generator(content, info, singlevideo), showbusy).run() except quickjson.JSONException as ex: # json_result['error']['code'] == -32602 is the best we get, invalid params if content == 'other' and ex.json_result.get('error', {}).get('code', 0) == -32602 \ and not any(1 for source in quickjson.get_sources('video') if info['path'].startswith(source['file'])): xbmcgui.Dialog().ok(L(ADD_SOURCE_HEADER), L(ADD_SOURCE_MESSAGE).format(info['path'])) else: raise
def __init__(self, params): super(Trainer, self).__init__() # save params self.params = params # set device self.device = torch.device(params['training']['device']) # set niter to -1 self.niter = -1 # set attribute for best score self.best_psnr = None # create generator self.netG = get_generator(self.params['generator']) print(self.netG) # move it to device self.netG.to(self.device) # define output filename self.name = get_generator_name(self.params['generator']) # define dirs self.base_dir = os.path.join('./checkpoints', self.params['exp_name']) self.model_dir = self.base_dir self.logs_dir = self.base_dir self.images_dir = self.base_dir self.out_dir = os.path.join(self.base_dir, 'regen') # create them if not os.path.isdir(self.base_dir): os.makedirs(self.base_dir) if not os.path.isdir(self.model_dir): os.makedirs(self.model_dir) if not os.path.isdir(self.logs_dir): os.makedirs(self.logs_dir) if not os.path.isdir(self.images_dir): os.makedirs(self.images_dir) if not os.path.isdir(self.out_dir): os.makedirs(self.out_dir) # if not training, do not continue if not self.training: return # get loss self.loss = get_loss(self.params['training']['loss']) # create generator optimizer self.optimG = torch.optim.Adam( self.netG.parameters(), lr=self.params['training']['lr'], weight_decay=self.params['training']['weight_decay']) # init weights for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def add_generator(id, name): s = system.get_system() gen_class = generators.get_generator(id) gen = gen_class() params = gen.get_parameters() if params != None: user_params = {} print(f"{id} generator setup:") for param in params: user_in = input(f"{param} ({params[param]}) $ ") if len(user_in) == 0: user_params[param] = params[param] elif isinstance(params[param], str): user_params[param] = user_in else: user_val = eval(user_in) user_params[param] = user_val gen.set_parameters(user_params) s.add_generator(name, gen) print(f"Created gen {name}!")
def evaluate_network(results_dir, checkpoint_file, db_path=config.db_path, evaluate_malware=True, evaluate_count=True, evaluate_tags=True, remove_missing_features='scan'): """ Take a trained feedforward neural network model and output evaluation results to a csv in the specified location. :param results_dir: The directory to which to write the 'results.csv' file; WARNING -- this will overwrite any existing results in that location :param checkpoint_file: The checkpoint file containing the weights to evaluate :param db_path: the path to the directory containing the meta.db file; defaults to the value in config.py :param evaluate_malware: defaults to True; whether or not to record malware labels and predictions :param evaluate_count: defaults to True; whether or not to record count labels and predictions :param evaluate_tags: defaults to True; whether or not to record individual tag labels and predictions :param remove_missing_features: See help for remove_missing_features in train.py / train_network """ os.system('mkdir -p {}'.format(results_dir)) model = PENetwork(use_malware=True, use_counts=True, use_tags=True, n_tags=len(Dataset.tags), feature_dimension=2381) model.load_state_dict(torch.load(checkpoint_file)) model.to(device) generator = get_generator(mode='test', path=db_path, use_malicious_labels=evaluate_malware, use_count_labels=evaluate_count, use_tag_labels=evaluate_tags, return_shas=True, remove_missing_features=remove_missing_features) logger.info('...running network evaluation') f = open(os.path.join(results_dir,'results.csv'),'w') first_batch = True for shas, features, labels in tqdm.tqdm(generator): features = features.to(device) predictions = model(features) results = normalize_results(labels, predictions) pd.DataFrame(results, index=shas).to_csv(f, header=first_batch) first_batch=False f.close() print('...done')
def main(argv=None): tf.set_random_seed(1237) np.random.seed(1237) # Load data x_train, sorted_x_train = \ utils.load_image_data(FLAGS.dataset, n_xl, n_channels, FLAGS.mbs) xshape = (-1, n_xl, n_xl, n_channels) print('Data shape = {}'.format(x_train.shape)) x_train = x_train * 2 - 1 sorted_x_train = sorted_x_train * 2 - 1 # Make some data is_training = tf.placeholder_with_default(False, shape=[], name='is_training') generator = get_generator(FLAGS.dataset, FLAGS.arch, n_code if FLAGS.arch == 'ae' else n_x, n_xl, n_channels, n_z, ngf, is_training, 'transformation') if FLAGS.arch == 'adv': discriminator = get_discriminator(FLAGS.dataset, FLAGS.arch, n_x, n_xl, n_channels, n_f, ngf // 2, is_training) decoder = get_generator(FLAGS.dataset, FLAGS.arch, n_x, n_xl, n_channels, n_f, ngf, is_training, 'decoder') # Define training/evaluation parameters run_name = 'results/{}_{}_{}_{}_c{}_mbs{}_bs{}_lr{}_t0{}'.format( FLAGS.dataset, FLAGS.arch, FLAGS.dist, FLAGS.match, n_code, FLAGS.mbs, FLAGS.bs, FLAGS.lr0, FLAGS.t0) if not os.path.exists(run_name): os.makedirs(run_name) # Build the computation graph if FLAGS.arch == 'ae': ae = ConvAE(x_train, (None, n_xl, n_xl, n_channels), ngf) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ae.train(sess) x_code = ae.encode(x_train, sess) sorted_x_code = ae.encode(sorted_x_train, sess) model = MyPMD(x_code, sorted_x_code, xshape, generator, run_name, ae) elif FLAGS.arch == 'adv': model = MyPMD(x_train, sorted_x_train, xshape, generator, run_name, F=discriminator, D=decoder) else: model = MyPMD(x_train, sorted_x_train, xshape, generator, run_name) # Run the inference with tf.Session() as sess: sess.run(tf.global_variables_initializer()) if FLAGS.arch == 'ae': ae.train(sess) print('Training...') model.train(sess, gen_dict={ model.batch_size_ph: FLAGS.mbs, is_training: False }, opt_dict={ model.batch_size_ph: FLAGS.bs, is_training: True }, iters=((x_train.shape[0] - 1) // FLAGS.mbs) + 1)