def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) shutil.copyfile( os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category')) category = utils.get_category(config) category_index = dict([(name, i) for i, name in enumerate(category)]) datasets = config.get('cache', 'datasets').split() for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in datasets: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, category_index) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) torch.manual_seed(args.seed) model_dir = utils.get_model_dir(config) init_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'init_net.pb'), 'rb') as f: init_net.ParseFromString(f.read()) predict_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'predict_net.pb'), 'rb') as f: predict_net.ParseFromString(f.read()) p = workspace.Predictor(init_net, predict_net) height, width = tuple(map(int, config.get('image', 'size').split())) tensor = torch.randn(1, 3, height, width) # Checksum output = p.run([tensor.numpy()]) for key, a in [ ('tensor', tensor.cpu().numpy()), ('output', output[0]), ]: print('\t'.join( map(str, [ key, a.shape, utils.abs_mean(a), hashlib.md5(a.tostring()).hexdigest() ])))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) mappers, _ = utils.get_dataset_mappers(config) for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in mappers: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, mappers[dataset]) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) height, width = tuple(map(int, config.get('image', 'size').split())) resize = transform.parse_transform(config, config.get('transform', 'resize_test')) transform_image = transform.get_transform(config, config.get('transform', 'image_test').split()) transform_tensor = transform.get_transform(config, config.get('transform', 'tensor').split()) # load image image_bgr = cv2.imread('image.jpg') image_resized = resize(image_bgr, height, width) image = transform_image(image_resized) tensor = transform_tensor(image).unsqueeze(0) # Caffe2 init_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'init_net.pb'), 'rb') as f: init_net.ParseFromString(f.read()) predict_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'predict_net.pb'), 'rb') as f: predict_net.ParseFromString(f.read()) p = workspace.Predictor(init_net, predict_net) results = p.run([tensor.numpy()]) logging.info(utils.abs_mean(results[0])) logging.info(hashlib.md5(results[0].tostring()).hexdigest())
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) os.makedirs(cache_dir, exist_ok=True) shutil.copyfile(os.path.expanduser(os.path.expandvars(config.get('cache', 'category'))), os.path.join(cache_dir, 'category')) category = utils.get_category(config) category_index = dict([(name, i) for i, name in enumerate(category)]) datasets = config.get('cache', 'datasets').split() for phase in args.phase: path = os.path.join(cache_dir, phase) + '.pkl' logging.info('save cache file: ' + path) data = [] for dataset in datasets: logging.info('load %s dataset' % dataset) module, func = dataset.rsplit('.', 1) module = importlib.import_module(module) func = getattr(module, func) data += func(config, path, category_index) if config.getboolean('cache', 'shuffle'): random.shuffle(data) with open(path, 'wb') as f: pickle.dump(data, f) logging.info('%s data are saved into %s' % (str(args.phase), cache_dir))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) mapper = [(inflection.underscore(name), member()) for name, member in inspect.getmembers(importlib.machinery.SourceFileLoader('', __file__).load_module()) if inspect.isclass(member)] path = os.path.join(model_dir, os.path.basename(os.path.splitext(__file__)[0])) + '.xlsx' with xlsxwriter.Workbook(path, {'strings_to_urls': False, 'nan_inf_to_errors': True}) as workbook: worksheet = workbook.add_worksheet(args.worksheet) for j, (key, m) in enumerate(mapper): worksheet.write(0, j, key) for i, (name, variable) in enumerate(state_dict.items()): value = m(name, variable) worksheet.write(1 + i, j, value) if hasattr(m, 'format'): m.format(workbook, worksheet, i, j) worksheet.autofilter(0, 0, i, len(mapper) - 1) worksheet.freeze_panes(1, 0) logging.info(path)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) mapper = [ (inflection.underscore(name), member()) for name, member in inspect.getmembers( importlib.machinery.SourceFileLoader('', __file__).load_module()) if inspect.isclass(member) ] path = os.path.join( model_dir, os.path.basename(os.path.splitext(__file__)[0])) + '.xlsx' with xlsxwriter.Workbook(path, { 'strings_to_urls': False, 'nan_inf_to_errors': True }) as workbook: worksheet = workbook.add_worksheet(args.worksheet) for j, (key, m) in enumerate(mapper): worksheet.write(0, j, key) for i, (name, variable) in enumerate(state_dict.items()): value = m(name, variable) worksheet.write(1 + i, j, value) if hasattr(m, 'format'): m.format(workbook, worksheet, i, j) worksheet.autofilter(0, 0, i, len(mapper) - 1) worksheet.freeze_panes(1, 0) logging.info(path)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) cache_dir = utils.get_cache_dir(config) model_dir = utils.get_model_dir(config) category = utils.get_category( config, cache_dir if os.path.exists(cache_dir) else None) anchors = utils.get_anchors(config) anchors = torch.from_numpy(anchors).contiguous() path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels( config, state_dict), anchors, len(category)) dnn.load_state_dict(state_dict) height, width = tuple(map(int, config.get('image', 'size').split())) resize = transform.parse_transform(config, config.get('transform', 'resize_test')) transform_image = transform.get_transform( config, config.get('transform', 'image_test').split()) transform_tensor = transform.get_transform( config, config.get('transform', 'tensor').split()) # load image image_bgr = cv2.imread('image.jpg') image_resized = resize(image_bgr, height, width) image = transform_image(image_resized) tensor = transform_tensor(image).unsqueeze(0) # Checksum for key, var in dnn.state_dict().items(): a = var.cpu().numpy() print('\t'.join( map(str, [ key, a.shape, utils.abs_mean(a), hashlib.md5(a.tostring()).hexdigest() ]))) output = dnn(torch.autograd.Variable(tensor, volatile=True)).data for key, a in [ ('image_bgr', image_bgr), ('image_resized', image_resized), ('tensor', tensor.cpu().numpy()), ('output', output.cpu().numpy()), ]: print('\t'.join( map(str, [ key, a.shape, utils.abs_mean(a), hashlib.md5(a.tostring()).hexdigest() ])))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) init_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'init_net.pb')) predict_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'predict_net.pb')) benchmark = onnx_caffe2.helper.benchmark_caffe2_model(init_net, predict_net) logging.info('benchmark=%f(milliseconds)' % benchmark)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) category = utils.get_category(config) anchors = torch.from_numpy(utils.get_anchors(config)).contiguous() path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) _model = utils.parse_attr(config.get('model', 'dnn')) dnn = _model(model.ConfigChannels(config, state_dict), anchors, len(category)) logging.info( humanize.naturalsize( sum(var.cpu().numpy().nbytes for var in dnn.state_dict().values()))) dnn.load_state_dict(state_dict) height, width = tuple(map(int, config.get('image', 'size').split())) image = torch.autograd.Variable( torch.randn(args.batch_size, 3, height, width)) output = dnn(image) state_dict = dnn.state_dict() d = utils.dense(state_dict[args.name]) keep = torch.LongTensor(np.argsort(d)[:int(len(d) * args.keep)]) modifier = utils.channel.Modifier( args.name, state_dict, dnn, lambda name, var: var[keep], lambda name, var, mapper: var[mapper(keep, len(d))], debug=args.debug, ) modifier(output.grad_fn) if args.debug: path = modifier.dot.view( '%s.%s.gv' % (os.path.basename(model_dir), os.path.basename(os.path.splitext(__file__)[0])), os.path.dirname(model_dir)) logging.info(path) assert len(keep) == len(state_dict[args.name]) dnn = _model(model.ConfigChannels(config, state_dict), anchors, len(category)) dnn.load_state_dict(state_dict) dnn(image) if not args.debug: torch.save(state_dict, path)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) model = onnx.load(model_dir + '.onnx') onnx.checker.check_model(model) init_net, predict_net = onnx_caffe2.backend.Caffe2Backend.onnx_graph_to_caffe2_net(model.graph, device='CPU') onnx_caffe2.helper.save_caffe2_net(init_net, os.path.join(model_dir, 'init_net.pb')) onnx_caffe2.helper.save_caffe2_net(predict_net, os.path.join(model_dir, 'predict_net.pb'), output_txt=True) logging.info(model_dir)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) if args.level: logging.getLogger().setLevel(args.level.upper()) model_dir = utils.get_model_dir(config) init_net = onnx_caffe2.helper.load_caffe2_net( os.path.join(model_dir, 'init_net.pb')) predict_net = onnx_caffe2.helper.load_caffe2_net( os.path.join(model_dir, 'predict_net.pb')) benchmark = onnx_caffe2.helper.benchmark_caffe2_model( init_net, predict_net) logging.info('benchmark=%f(milliseconds)' % benchmark)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) init_net = onnx_caffe2.helper.load_caffe2_net( os.path.join(model_dir, 'init_net.pb')) predict_net = onnx_caffe2.helper.load_caffe2_net( os.path.join(model_dir, 'predict_net.pb')) benchmark = onnx_caffe2.helper.benchmark_caffe2_model( init_net, predict_net) logging.info('benchmark=%f(milliseconds)' % benchmark)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) if args.level: logging.getLogger().setLevel(args.level.upper()) model_dir = utils.get_model_dir(config) model = onnx.load(model_dir + '.onnx') onnx.checker.check_model(model) init_net, predict_net = onnx_caffe2.backend.Caffe2Backend.onnx_graph_to_caffe2_net( model.graph, device='CPU') onnx_caffe2.helper.save_caffe2_net(init_net, os.path.join(model_dir, 'init_net.pb')) onnx_caffe2.helper.save_caffe2_net(predict_net, os.path.join(model_dir, 'predict_net.pb'), output_txt=True) logging.info(model_dir)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) if args.level: logging.getLogger().setLevel(args.level.upper()) model_dir = utils.get_model_dir(config) checkpoint, step, epoch = utils.train.load_model(model_dir) state_dict = checkpoint['dnn'] sig = inspect.signature(size) mapper = utils.load_functions(__file__) mapper = [(key, fn) for key, fn in mapper if inspect.signature(fn).parameters == sig.parameters] if not args.nohead: print('\t'.join(map(operator.itemgetter(0), mapper))) for name, variable in state_dict.items(): row = (fn(name, variable) for key, fn in mapper) print('\t'.join(map(str, row)))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) model = onnx.load(model_dir + '.onnx') onnx.checker.check_model(model) init_net, predict_net = onnx_caffe2.backend.Caffe2Backend.onnx_graph_to_caffe2_net( model.graph, device='CPU') onnx_caffe2.helper.save_caffe2_net(init_net, os.path.join(model_dir, 'init_net.pb')) onnx_caffe2.helper.save_caffe2_net(predict_net, os.path.join(model_dir, 'predict_net.pb'), output_txt=True) logging.info(model_dir)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) path, step, epoch = utils.train.load_model(model_dir) checkpoint = torch.load(path, map_location=lambda storage, loc: storage) state_dict = checkpoint['dnn'] sig = inspect.signature(size) mapper = utils.load_functions(__file__) mapper = [(key, fn) for key, fn in mapper if inspect.signature(fn).parameters == sig.parameters] if not args.nohead: print('\t'.join(map(operator.itemgetter(0), mapper))) for name, variable in state_dict.items(): row = (fn(name, variable) for key, fn in mapper) print('\t'.join(map(str, row)))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) category = utils.get_category(config) anchors = torch.from_numpy(utils.get_anchors(config)).contiguous() try: path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) except (FileNotFoundError, ValueError): logging.warning('model cannot be loaded') state_dict = None dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels( config, state_dict), anchors, len(category)) logging.info( humanize.naturalsize( sum(var.cpu().numpy().nbytes for var in dnn.state_dict().values()))) if state_dict is not None: dnn.load_state_dict(state_dict) height, width = tuple(map(int, config.get('image', 'size').split())) image = torch.autograd.Variable( torch.randn(args.batch_size, 3, height, width)) output = dnn(image) state_dict = dnn.state_dict() graph = utils.visualize.Graph(config, state_dict) graph(output.grad_fn) diff = [key for key in state_dict if key not in graph.drawn] if diff: logging.warning('variables not shown: ' + str(diff)) path = graph.dot.view( os.path.basename(model_dir) + '.gv', os.path.dirname(model_dir)) logging.info(path)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) category = utils.get_category(config) anchors = torch.from_numpy(utils.get_anchors(config)).contiguous() path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) dnn = utils.parse_attr(config.get('model', 'dnn'))(model.ConfigChannels( config, state_dict), anchors, len(category)) logging.info( humanize.naturalsize( sum(var.cpu().numpy().nbytes for var in dnn.state_dict().values()))) dnn.load_state_dict(state_dict) height, width = tuple(map(int, config.get('image', 'size').split())) image = torch.autograd.Variable( torch.randn(args.batch_size, 3, height, width)) output = dnn(image) state_dict = dnn.state_dict() closure = utils.walk.Closure(args.name, state_dict, type(dnn).scope, args.debug) closure(output.grad_fn) d = utils.dense(state_dict[args.name]) channels = torch.LongTensor(np.argsort(d)[int(len(d) * args.remove):]) utils.walk.prune(closure, channels) if args.debug: path = closure.dot.view( os.path.basename(model_dir) + '.gv', os.path.dirname(model_dir)) logging.info(path) else: torch.save(state_dict, path)
def run(self, definition, run_type=None): if run_type == "performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None if op_type == "insert": if not run_params: logger.debug("No run params") continue for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["table_name"] volume_name = param["db_path_prefix"] print(table_name) (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): # Update server config utils.modify_config(k, v, type="server", db_slave=None) container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) # debug # milvus.create_index("ivf_sq8", 16384) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) # wait for file merge time.sleep(table_size * dimension / 5000000) # Clear up utils.remove_container(container) elif op_type == "query": for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) logger.debug(milvus.show_tables()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser( param) for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) # milvus.drop_index() # milvus.create_index(index_type, nlist) result = milvus.describe_index() logger.info(result) logger.info(milvus.count()) # preload index milvus.preload_table() logger.info("Start warm up query") res = self.do_query(milvus, table_name, [1], [1], 1, 1) logger.info("End warm up query") # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query( milvus, table_name, top_ks, nqs, nprobe, run_count) headers = ["Nq/Top-k"] headers.extend( [str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) utils.remove_container(container) elif run_type == "insert_performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None if not run_params: logger.debug("No run params") continue for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["table_name"] volume_name = param["db_path_prefix"] print(table_name) (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): # Update server config utils.modify_config(k, v, type="server", db_slave=None) container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if milvus.exists_table(): milvus.delete() time.sleep(10) milvus.create_table(table_name, dimension, index_file_size, metric_type) # debug # milvus.create_index("ivf_sq8", 16384) res = self.do_insert(milvus, table_name, data_type, dimension, table_size, param["ni_per"]) logger.info(res) # wait for file merge time.sleep(table_size * dimension / 5000000) # Clear up utils.remove_container(container) elif run_type == "search_performance": for op_type, op_value in definition.items(): # run docker mode run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) logger.debug(milvus.show_tables()) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) # milvus.drop_index() # milvus.create_index(index_type, nlist) result = milvus.describe_index() logger.info(result) logger.info(milvus.count()) # preload index milvus.preload_table() logger.info("Start warm up query") res = self.do_query(milvus, table_name, [1], [1], 1, 1) logger.info("End warm up query") # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) headers = ["Nq/Top-k"] headers.extend( [str(top_k) for top_k in top_ks]) utils.print_table(headers, nqs, res) utils.remove_container(container) elif run_type == "accuracy": """ { "dataset": "random_50m_1024_512", "index.index_types": ["flat", ivf_flat", "ivf_sq8"], "index.nlists": [16384], "nprobes": [1, 32, 128], "nqs": [100], "top_ks": [1, 64], "server.use_blas_threshold": 1100, "server.cpu_cache_capacity": 256 } """ for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] sift_acc = False if "sift_acc" in param: sift_acc = param["sift_acc"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") volume_name = param["db_path_prefix"] container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue # parse index info index_types = param["index.index_types"] nlists = param["index.nlists"] # parse top-k, nq, nprobe top_ks, nqs, nprobes = parser.search_params_parser(param) if sift_acc is True: # preload groundtruth data true_ids_all = self.get_groundtruth_ids(table_size) acc_dict = {} for index_type in index_types: for nlist in nlists: result = milvus.describe_index() logger.info(result) milvus.create_index(index_type, nlist) # preload index milvus.preload_table() # Run query test for nprobe in nprobes: logger.info( "index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) for top_k in top_ks: for nq in nqs: result_ids = [] id_prefix = "%s_index_%s_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, index_type, nlist, metric_type, nprobe, top_k, nq) if sift_acc is False: self.do_query_acc( milvus, table_name, top_k, nq, nprobe, id_prefix) if index_type != "flat": # Compute accuracy base_name = "%s_index_flat_nlist_%s_metric_type_%s_nprobe_%s_top_k_%s_nq_%s" % \ (table_name, nlist, metric_type, nprobe, top_k, nq) avg_acc = self.compute_accuracy( base_name, id_prefix) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, avg_acc)) else: result_ids, result_distances = self.do_query_ids( milvus, table_name, top_k, nq, nprobe) debug_file_ids = "0.5.3_result_ids" debug_file_distances = "0.5.3_result_distances" with open(debug_file_ids, "w+") as fd: total = 0 for index, item in enumerate( result_ids): true_item = true_ids_all[: nq, : top_k].tolist( )[index] tmp = set( item).intersection( set(true_item)) total = total + len(tmp) fd.write( "query: N-%d, intersection: %d, total: %d\n" % (index, len(tmp), total)) fd.write("%s\n" % str(item)) fd.write("%s\n" % str(true_item)) acc_value = self.get_recall_value( true_ids_all[:nq, :top_k]. tolist(), result_ids) logger.info( "Query: <%s> accuracy: %s" % (id_prefix, acc_value)) # # print accuracy table # headers = [table_name] # headers.extend([str(top_k) for top_k in top_ks]) # utils.print_table(headers, nqs, res) # remove container, and run next definition logger.info("remove container, and run next definition") utils.remove_container(container) elif run_type == "stability": for op_type, op_value in definition.items(): if op_type != "query": logger.warning( "invalid operation: %s in accuracy test, only support query operation" % op_type) break run_count = op_value["run_count"] run_params = op_value["params"] container = None for index, param in enumerate(run_params): logger.info("Definition param: %s" % str(param)) table_name = param["dataset"] index_type = param["index_type"] volume_name = param["db_path_prefix"] (data_type, table_size, index_file_size, dimension, metric_type) = parser.table_parser(table_name) # set default test time if "during_time" not in param: during_time = 100 # seconds else: during_time = int(param["during_time"]) * 60 # set default query process num if "query_process_num" not in param: query_process_num = 10 else: query_process_num = int(param["query_process_num"]) for k, v in param.items(): if k.startswith("server."): utils.modify_config(k, v, type="server") container = utils.run_server(self.image, test_type="remote", volume_name=volume_name, db_slave=None) time.sleep(2) milvus = MilvusClient(table_name) # Check has table or not if not milvus.exists_table(): logger.warning( "Table %s not existed, continue exec next params ..." % table_name) continue start_time = time.time() insert_vectors = [[ random.random() for _ in range(dimension) ] for _ in range(10000)] i = 0 while time.time() < start_time + during_time: i = i + 1 processes = [] # do query # for i in range(query_process_num): # milvus_instance = MilvusClient(table_name) # top_k = random.choice([x for x in range(1, 100)]) # nq = random.choice([x for x in range(1, 100)]) # nprobe = random.choice([x for x in range(1, 1000)]) # # logger.info("index_type: %s, nlist: %s, metric_type: %s, nprobe: %s" % (index_type, nlist, metric_type, nprobe)) # p = Process(target=self.do_query, args=(milvus_instance, table_name, [top_k], [nq], [nprobe], run_count, )) # processes.append(p) # p.start() # time.sleep(0.1) # for p in processes: # p.join() milvus_instance = MilvusClient(table_name) top_ks = random.sample([x for x in range(1, 100)], 3) nqs = random.sample([x for x in range(1, 1000)], 3) nprobe = random.choice([x for x in range(1, 500)]) res = self.do_query(milvus, table_name, top_ks, nqs, nprobe, run_count) if i % 10 == 0: status, res = milvus_instance.insert( insert_vectors, ids=[x for x in range(len(insert_vectors))]) if not status.OK(): logger.error(status) # status = milvus_instance.drop_index() # if not status.OK(): # logger.error(status) # index_type = random.choice(["flat", "ivf_flat", "ivf_sq8"]) milvus_instance.create_index(index_type, 16384) result = milvus.describe_index() logger.info(result) # milvus_instance.create_index("ivf_sq8", 16384) utils.remove_container(container) else: logger.warning("Run type: %s not supported" % run_type)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) torch.manual_seed(args.seed) mapper = load_mapper(os.path.expandvars(os.path.expanduser(args.mapper))) model_dir = utils.get_model_dir(config) _, num_parts = utils.get_dataset_mappers(config) limbs_index = utils.get_limbs_index(config) height, width = tuple(map(int, config.get('image', 'size').split())) tensor = torch.randn(args.batch_size, 3, height, width) # PyTorch try: path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) except (FileNotFoundError, ValueError): state_dict = {name: None for name in ('dnn', 'stages')} config_channels_dnn = model.ConfigChannels(config, state_dict['dnn']) dnn = utils.parse_attr(config.get('model', 'dnn'))(config_channels_dnn) config_channels_stages = model.ConfigChannels(config, state_dict['stages'], config_channels_dnn.channels) channel_dict = model.channel_dict(num_parts, len(limbs_index)) stages = nn.Sequential(*[ utils.parse_attr(s)(config_channels_stages, channel_dict, config_channels_dnn.channels, str(i)) for i, s in enumerate(config.get('model', 'stages').split()) ]) inference = model.Inference(config, dnn, stages) inference.eval() state_dict = inference.state_dict() # TensorFlow with open(os.path.expanduser(os.path.expandvars(args.path)), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) image = ops.convert_to_tensor(np.transpose(tensor.cpu().numpy(), [0, 2, 3, 1]), name='image') tf.import_graph_def(graph_def, input_map={'image:0': image}) saver = utils.train.Saver(model_dir, config.getint('save', 'keep')) with tf.Session(config=tf.ConfigProto(device_count={ 'CPU': 1, 'GPU': 0 }, allow_soft_placement=True, log_device_placement=False)) as sess: try: for dst in state_dict: src, converter = mapper[dst] if src.isdigit(): state_dict[dst].fill_(float(src)) else: op = sess.graph.get_operation_by_name(src) t = op.values()[0] v = sess.run(t) state_dict[dst] = torch.from_numpy(converter(v)) val = state_dict[dst].numpy() print('\t'.join( list( map(str, (dst, src, val.shape, utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest()))))) inference.load_state_dict(state_dict) if args.delete: logging.warning('delete model directory: ' + model_dir) shutil.rmtree(model_dir, ignore_errors=True) saver( dict( dnn=inference.dnn.state_dict(), stages=inference.stages.state_dict(), ), 0) finally: if args.debug: for op in sess.graph.get_operations(): if op.values(): logging.info(op.values()[0]) for name in args.debug: t = sess.graph.get_tensor_by_name(name + ':0') val = sess.run(t) val = np.transpose(val, [0, 3, 1, 2]) print('\t'.join( map(str, [ name, 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ]))) _tensor = torch.autograd.Variable(tensor, volatile=True) val = dnn(_tensor).data.numpy() print('\t'.join( map(str, [ 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ]))) for stage, output in enumerate(inference(_tensor)): for name, feature in output.items(): val = feature.data.numpy() print('\t'.join( map(str, [ 'stage%d/%s' % (stage, name), 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ]))) forward = inference.forward inference.forward = lambda self, *x: list( forward(self, *x)[-1].values()) with SummaryWriter(model_dir) as writer: writer.add_graph(inference, (_tensor, ))
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) torch.manual_seed(args.seed) mapper = load_mapper(os.path.expandvars(os.path.expanduser(args.mapper))) model_dir = utils.get_model_dir(config) _, num_parts = utils.get_dataset_mappers(config) limbs_index = utils.get_limbs_index(config) height, width = tuple(map(int, config.get('image', 'size').split())) tensor = torch.randn(args.batch_size, 3, height, width) # PyTorch try: path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) except (FileNotFoundError, ValueError): state_dict = {name: None for name in ('dnn', 'stages')} config_channels_dnn = model.ConfigChannels(config, state_dict['dnn']) dnn = utils.parse_attr(config.get('model', 'dnn'))(config_channels_dnn) config_channels_stages = model.ConfigChannels(config, state_dict['stages'], config_channels_dnn.channels) channel_dict = model.channel_dict(num_parts, len(limbs_index)) stages = nn.Sequential(*[ utils.parse_attr(s)(config_channels_stages, channel_dict, config_channels_dnn.channels, str(i)) for i, s in enumerate(config.get('model', 'stages').split()) ]) inference = model.Inference(config, dnn, stages) inference.eval() state_dict = inference.state_dict() # Caffe net = caffe.Net(os.path.expanduser(os.path.expandvars(args.prototxt)), os.path.expanduser(os.path.expandvars(args.caffemodel)), caffe.TEST) if args.debug: logging.info('Caffe variables') for name, blobs in net.params.items(): for i, blob in enumerate(blobs): val = blob.data print('\t'.join( map(str, [ '%s/%d' % (name, i), 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ]))) logging.info('Caffe features') input = net.blobs[args.input] input.reshape(*tensor.size()) input.data[...] = tensor.numpy() net.forward() for name, blob in net.blobs.items(): val = blob.data print('\t'.join( map(str, [ name, 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ]))) # convert saver = utils.train.Saver(model_dir, config.getint('save', 'keep')) try: for dst in state_dict: src, transform = mapper[dst] blobs = [b.data for b in net.params[src]] blob = transform(blobs) if isinstance(blob, np.ndarray): state_dict[dst] = torch.from_numpy(blob) else: state_dict[dst].fill_(blob) val = state_dict[dst].numpy() logging.info('\t'.join( list( map(str, (dst, src, val.shape, utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest()))))) inference.load_state_dict(state_dict) if args.delete: logging.warning('delete model directory: ' + model_dir) shutil.rmtree(model_dir, ignore_errors=True) saver( dict( dnn=inference.dnn.state_dict(), stages=inference.stages.state_dict(), ), 0) finally: for stage, output in enumerate( inference(torch.autograd.Variable(tensor, volatile=True))): for name, feature in output.items(): val = feature.data.numpy() print('\t'.join( map(str, [ 'stage%d/%s' % (stage, name), 'x'.join(map(str, val.shape)), utils.abs_mean(val), hashlib.md5(val.tostring()).hexdigest(), ])))