args = argparser.parse_args() assert args.train != '' or args.resume_training != '', "Must provide training data or a model" assert not (args.evaluate and not args.resume_training), "provide a model with --resume" assert not (not args.evaluate and (args.train == '' or args.valid == '')), "use --train and --valid for training" assert args.batch_size % 2 == 0, "use a multiple of 2 for batch_size" if not args.keep_probs: args.keep_probs = np.ones(len(args.hiddens) + 1) assert len(args.hiddens) + 1 == len(args.keep_probs) print(' '.join(sys.argv)) print(git_log()) np.random.seed(args.seed) random.seed(args.seed) tf.set_random_seed(args.seed) np.set_printoptions(precision=3) np.set_printoptions(suppress=True) required_folders = ['log', 'summary', 'model'] for folder in required_folders: if not os.path.exists(folder): os.makedirs(folder) # load data feat_idx = np.arange(11) args.n_feat = 8 * 8 + 2 * 16 + 1 # ip-> 8*8, port-> 2*16, protocol->1
if __name__ == '__main__': argparser = argparse.ArgumentParser(sys.argv[0]) argparser.add_argument("--data", type=str, nargs='*', help="list of input .npy data", required=True) argparser.add_argument("--save", type=str, help="prefix to save the results", required=True) argparser.add_argument("--seed", type=int, help="random state for sklearn", default=69) argparser.add_argument("--n_hashes_list", type=int, nargs='*', help="number of hashes", required=True) argparser.add_argument("--space_list", type=float, nargs='*', help="space in MB", required=True) argparser.add_argument("--n_workers", type=int, help="number of workers", default=10) argparser.add_argument("--aol_data", action='store_true', default=False) argparser.add_argument("--count_sketch", action='store_true', default=False) args = argparser.parse_args() command = ' '.join(sys.argv) + '\n' log_str = command log_str += git_log() + '\n' print(log_str) np.random.seed(args.seed) if args.aol_data: assert len(args.data) == 1 x, y = get_data_aol_query(args.data[0]) else: x, y = get_data_str_with_ports_list(args.data) get_stat(args.data, x, y) if args.count_sketch: name = 'count_sketch' else: name = 'count_min' folder = os.path.join('param_results', name, '')