def main(): parser = argparse.ArgumentParser(description='profile torchvision models') parser.add_argument('-d', '--device', type=str, default="all", help="device ids") parser.add_argument('-v', '--verbose', action='store_true', help="verbose msg") parser.add_argument('-t', '--timing', action='store_true', help="enable timing") args = parser.parse_args() device, devlist = get_device(args.device) rasp.set_config({ 'profile': { 'batch_size': 1, 'num_batches': 5, 'warmup_batches': 5, 'timing_max_depth': -1, 'compute_max_depth': -1, 'verbose': args.verbose, }, }) fields = [ 'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat', 'lat[%]', 'flops', 'mem_r', 'mem_w', 'mem_rw', 'dev_mem_alloc', 'dev_mem_delta' ] C = 16 w = 4 l = 8 model = RectNet(C, w, l).to(device) bsize = 128 fm_size = 224 inputs = torch.randn(bsize, C, fm_size, fm_size).to(device) stats = rasp.profile_compute_once(model, inputs=inputs) if args.timing: stats = rasp.profile_timing_once(model, inputs=inputs) summary, _ = rasp.summary_tape(stats, report_fields=fields) print(summary) summary, _ = rasp.summary_all(stats, report_fields=fields) print(summary) summary, _ = rasp.summary_node(stats, report_fields=fields) print(summary) rasp.profile_off(model)
def main(): parser = argparse.ArgumentParser(description='profile torchvision models') parser.add_argument('-d', '--device', type=str, default='all', help='device ids') parser.add_argument('-v', '--verbose', action='store_true', help='verbose msg') parser.add_argument('-t', '--timing', action='store_true', help='enable timing') parser.add_argument('-i', '--input', type=str, default='(1, 3, 224, 224)', help='input shape') args = parser.parse_args() device, devlist = get_device(args.device) rasp.set_config({ 'profile': { 'batch_size': 1, 'num_batches': 50, 'warmup_batches': 5, 'timing_max_depth': -1, 'compute_max_depth': -1, 'verbose': args.verbose, }, }) print('%s | %s | %s | %s | %s' % ('Model', 'Params', 'FLOPs', 'latency', 'FLOPS')) print('---|---|---|---|---') fields = [ 'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat', 'lat[%]', 'flops', 'FLOPS', 'mem_rw', 'dev_mem_alloc', 'dev_mem_delta' ] input_shape = tuple(eval(args.input)) inputs = torch.randn(input_shape, device=device) for i, name in enumerate(model_names): model = models.__dict__[name]().to(device=device) summary, df = rasp.stat(model, inputs=inputs, device=device, report_fields=fields, timing=args.timing, print_stat=False, returns='sum, data') if args.verbose: print(summary) total_f = df.tail(1) rasp.profile_off(model) flops, params = total_f.flops[0], total_f.params[0] if args.timing: latency = total_f.lat[0] FLOPS = total_f.FLOPS[0] else: latency = 0 FLOPS = 0 print('%s | %s | %s | %s | %s' % (name, rasp.round_value(params), rasp.round_value(flops), rasp.round_value(latency), rasp.round_value(FLOPS)))
def main(): parser = argparse.ArgumentParser(description='profile torchvision models') parser.add_argument('-d', '--device', type=str, default="all", help="device ids") parser.add_argument('-v', '--verbose', action='store_true', help="verbose msg") parser.add_argument('-t', '--timing', action='store_true', help="enable timing") parser.add_argument('-i', '--input', type=str, default='(8, 64, 224, 224)', help='input shape') args = parser.parse_args() device, devlist = get_device(args.device) rasp.set_config({ 'profile': { 'batch_size': 1, 'num_batches': 100, 'warmup_batches': 10, 'timing_max_depth': -1, 'compute_max_depth': -1, 'verbose': args.verbose, }, }) print("%s | %s | %s | %s" % ("Model", "Params", "FLOPs", "FLOPS")) print("---|---|---|---") fields = [ 'name', 'type', 'in_shape', 'out_shape', 'params', 'lat', 'net_lat', 'lat[%]', 'flops', 'FLOPS', 'mem_r', 'mem_w', 'mem_rw', 'dev_mem_alloc', 'dev_mem_delta' ] input_shape = tuple(eval(args.input)) chn_in = input_shape[1] inputs = torch.randn(input_shape, device=device) for i, name in enumerate(ops): model = ops[name](chn_in, stride=1, affine=True) summary, df = rasp.stat(model, inputs=inputs, device=device, report_fields=fields, timing=args.timing, print_stat=False, returns='sum, data') if args.verbose: print(summary) rasp.profile_off(model) total_f = df.tail(1) total_flops, total_params, total_FLOPS = total_f.flops[ 0], total_f.params[0], total_f.FLOPS[0] print("%s | %s | %s | %s" % (name, rasp.round_value(total_params), rasp.round_value(total_flops), rasp.round_value(total_FLOPS)))