Пример #1
0
	def test_example_use_with_downloading_a_page(self):
		CommunityFinder.init_session()

		tasks = []
		for url in URLS:
			community_finder = CommunityFinder(url)
			task = asyncio.ensure_future(community_finder.find_communities())
			tasks.append(task)

		loop = asyncio.get_event_loop()
		res = loop.run_until_complete(asyncio.gather(*tasks))
		CommunityFinder.close_session()
		helpers.print_dict(res)
Пример #2
0
	def test_on_coins(self):
		projects = json.load(open('coins.json'))
		total_error = 0
		res = []
		for project in projects:
			r = {
				'id': project['id']
			}

			if 'community' in project:
				r['community'] = project['community']

			# using without page download 
			c = CommunityFinder('')
			c.raw_page = json.dumps(project)
			c.find()
			# c.data <= here your results


			r['finded'] = c.data['community']
			delta_community = 0
			delta_count = 0
			for x in r['community']:
				if not (x == 'medium_www' or x == 'whitepaper'):
					if x not in r['finded']:
						delta_community += 1
					else:
						delta_count += abs(len(r['community'][x]) - len(r['finded'][x]))

			r['delta_community'] = delta_community
			r['delta_count'] = delta_count

			total_error += delta_count + delta_community
			res.append(r)

		print('Errors count:', total_error)
		print('Errors: ')
		helpers.print_dict([x for x in res if x['delta_community'] != 0 or x['delta_count'] != 0])
Пример #3
0
def sendMessage(dict, mode):

    msg = getHashedMessage(key, mode, "atm", dict)

    with socket(family=AF_INET, type=SOCK_STREAM) as sock:
        sock.settimeout(10)
        sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        try:
            sock.connect((ip, port))

            msg = getHashedMessage(key, mode, "atm", dict)
            sock.sendall(bytes(json.dumps(msg, sort_keys=True), "utf-8"))
            received = sock.recv(1024).strip()

            try:
                response = json.loads(received.decode("utf-8"))
            except:
                sys.exit(63)

            tmp = {}
            tmp.update(response)
            if isSameHash(key, tmp):

                sender = response["sender"]

                if sender != "bank":
                    exit(63)
                if response["code"] == True:
                    if mode == "new":
                        print_dict(dict)
                        with open(card, "w") as pin:
                            pin.write(dict["account"])
                            sendAck()
                        exit(0)
                    elif mode == "get":
                        sendAck()
                        tmp = {"balance": response["balance"], "account":dict["account"]}
                        print_dict(tmp)
                    else:
                        sendAck()
                        dict[mode]=dict[mode]
                        print_dict(dict)
                elif received == 'False':
                    exit(255)
                else:
                    exit(255)

            else:
                exit(63)
        except timeout as to:
            exit(63)
Пример #4
0
    def handle(self):
        self.data = None
        try:
            self.data = self.request.recv(1024).strip()
        except socket.timeout:
            print("protocol_error handle1", file=stderr)
            print("protocol_error", flush=True)
            return

        try:
            dict = json.loads(self.data.decode("utf-8"))
        except:
            print("protocol_error handle2", file=stderr)
            print('protocol_error', flush=True)
            return

        tmp = {}
        tmp.update(dict)
        if isSameHash(key, tmp):
            sender = dict.pop('sender')
            if sender != "atm":
                print("protocol_error handle3", file=stderr)
                print("protocol_error", flush=True)
                return

            dict.pop('hash')
            mode = dict['mode']
            dict.pop('mode')

            returnType = -1
            if mode == 'ack':
                returnType = -2
                print_dict(Bank.lastAction)
                Bank.lastAction = {}

            else:
                Bank.backupAccounts()
                if mode == "new":
                    returnType = Bank.addAccount(dict)
                    if returnType:
                        dict['initial_balance'] = dict['initial_balance']
                elif mode == "deposit":
                    returnType = Bank.deposit(dict)
                    if returnType:
                        dict['deposit'] = dict['deposit']
                elif mode == 'withdraw':
                    returnType = Bank.withdraw(dict)
                    if returnType:
                        dict['withdraw'] = dict['withdraw']
                elif mode == 'get':
                    msg = Bank.get(dict)
                    if not msg:
                        returnType = 0
                    else:
                        dict['balance'] = msg[dict['account']]
                        returnType = 2

                if returnType == -1:
                    print("protocol_error handle4", file=stderr)
                    print("protocol_error", flush=True)
                elif returnType != -2:
                    if returnType == 1:
                        msg = getHashedMessage(key=key,
                                               mode="res",
                                               sender="bank",
                                               dict={"code": True})
                    elif returnType == 0:
                        msg = getHashedMessage(key=key,
                                               mode="res",
                                               sender="bank",
                                               dict={"code": False})
                    elif returnType == 2:
                        msg = getHashedMessage(key=key,
                                               mode="res",
                                               sender="bank",
                                               dict={
                                                   "code": True,
                                                   "balance":
                                                   msg[dict['account']]
                                               })
                    self.wfile.write(
                        bytes(json.dumps(msg, sort_keys=True), "utf-8"))
                    if returnType > 0:
                        Bank.lastAction = dict
        else:
            print("protocol_error handle5", file=stderr)
            print("protocol_error", flush=True)
Пример #5
0
 def confirm():
     if not Bank.lastAction == {}:
         print_dict(Bank.lastAction)
         Bank.lastAction = {}
         Bank.lastAccounts = {}
Пример #6
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = args.cudnn_benchmark
    assert (args.steps is None or args.steps > 5)
    print("CUDNN BENCHMARK ", args.cudnn_benchmark)
    assert (torch.cuda.is_available())

    if args.fp16:
        optim_level = Optimization.mxprO3
    else:
        optim_level = Optimization.mxprO0
    batch_size = args.batch_size

    jasper_model_definition = toml.load(args.model_toml)
    dataset_vocab = jasper_model_definition['labels']['labels']
    ctc_vocab = add_ctc_labels(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = jasper_model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level
    if args.max_duration is not None:
        featurizer_config['max_duration'] = args.max_duration
    if args.pad_to is not None:
        featurizer_config[
            'pad_to'] = args.pad_to if args.pad_to >= 0 else "max"

    print('model_config')
    print_dict(jasper_model_definition)
    print('feature_config')
    print_dict(featurizer_config)

    data_layer = AudioToTextDataLayer(
        dataset_dir=args.dataset_dir,
        featurizer_config=featurizer_config,
        manifest_filepath=val_manifest,
        labels=dataset_vocab,
        batch_size=batch_size,
        pad_to_max=featurizer_config['pad_to'] == "max",
        shuffle=False,
        multi_gpu=False)

    audio_preprocessor = AudioPreprocessing(**featurizer_config)

    encoderdecoder = JasperEncoderDecoder(
        jasper_model_definition=jasper_model_definition,
        feat_in=1024,
        num_classes=len(ctc_vocab))

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        for k in audio_preprocessor.state_dict().keys():
            checkpoint['state_dict'][k] = checkpoint['state_dict'].pop(
                "audio_preprocessor." + k)
        audio_preprocessor.load_state_dict(checkpoint['state_dict'],
                                           strict=False)
        encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False)

    greedy_decoder = GreedyCTCDecoder()

    # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))

    N = len(data_layer)
    step_per_epoch = math.ceil(N / args.batch_size)

    print('-----------------')
    if args.steps is None:
        print('Have {0} examples to eval on.'.format(N))
        print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch))
    else:
        print('Have {0} examples to eval on.'.format(args.steps *
                                                     args.batch_size))
        print('Have {0} steps / (gpu * epoch).'.format(args.steps))
    print('-----------------')

    audio_preprocessor.cuda()
    encoderdecoder.cuda()
    if args.fp16:
        encoderdecoder = amp.initialize(
            models=encoderdecoder, opt_level=AmpOptimizations[optim_level])

    eval(data_layer=data_layer,
         audio_processor=audio_preprocessor,
         encoderdecoder=encoderdecoder,
         greedy_decoder=greedy_decoder,
         labels=ctc_vocab,
         args=args)
Пример #7
0
    print('-----------------')
    if args.steps is None:
        print('Have {0} examples to eval on.'.format(N))
        print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch))
    else:
        print('Have {0} examples to eval on.'.format(args.steps *
                                                     args.batch_size))
        print('Have {0} steps / (gpu * epoch).'.format(args.steps))
    print('-----------------')

    audio_preprocessor.cuda()
    encoderdecoder.cuda()
    if args.fp16:
        encoderdecoder = amp.initialize(
            models=encoderdecoder, opt_level=AmpOptimizations[optim_level])

    eval(data_layer=data_layer,
         audio_processor=audio_preprocessor,
         encoderdecoder=encoderdecoder,
         greedy_decoder=greedy_decoder,
         labels=ctc_vocab,
         args=args)


if __name__ == "__main__":
    args = parse_args()

    print_dict(vars(args))

    main(args)
Пример #8
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    assert(torch.cuda.is_available())
    torch.backends.cudnn.benchmark = args.cudnn

    # set up distributed training
    if args.local_rank is not None:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')

    multi_gpu = torch.distributed.is_initialized()
    if multi_gpu:
        print_once("DISTRIBUTED TRAINING with {} gpus".format(torch.distributed.get_world_size()))

    # define amp optimiation level
    if args.fp16:
        optim_level = Optimization.mxprO1
    else:
        optim_level = Optimization.mxprO0

    jasper_model_definition = toml.load(args.model_toml)
    dataset_vocab = jasper_model_definition['labels']['labels']
    ctc_vocab = add_ctc_labels(dataset_vocab)

    train_manifest = args.train_manifest 
    val_manifest = args.val_manifest 
    featurizer_config = jasper_model_definition['input']
    featurizer_config_eval = jasper_model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level
    featurizer_config_eval["optimization_level"] = optim_level

    sampler_type = featurizer_config.get("sampler", 'default')
    perturb_config = jasper_model_definition.get('perturb', None)
    if args.pad_to_max:
        assert(args.max_duration > 0)
        featurizer_config['max_duration'] = args.max_duration
        featurizer_config_eval['max_duration'] = args.max_duration
        featurizer_config['pad_to'] = "max"
        featurizer_config_eval['pad_to'] = "max"
    print_once('model_config')
    print_dict(jasper_model_definition)
         
    if args.gradient_accumulation_steps < 1:
        raise ValueError('Invalid gradient accumulation steps parameter {}'.format(args.gradient_accumulation_steps))
    if args.batch_size % args.gradient_accumulation_steps != 0:
        raise ValueError('gradient accumulation step {} is not divisible by batch size {}'.format(args.gradient_accumulation_steps, args.batch_size))


    data_layer = AudioToTextDataLayer(
                                    dataset_dir=args.dataset_dir,
                                    featurizer_config=featurizer_config,
                                    perturb_config=perturb_config,
                                    manifest_filepath=train_manifest,
                                    labels=dataset_vocab,
                                    batch_size=args.batch_size // args.gradient_accumulation_steps,
                                    multi_gpu=multi_gpu,
                                    pad_to_max=args.pad_to_max,
                                    sampler=sampler_type)

    data_layer_eval = AudioToTextDataLayer(
                                    dataset_dir=args.dataset_dir,
                                    featurizer_config=featurizer_config_eval,
                                    manifest_filepath=val_manifest,
                                    labels=dataset_vocab,
                                    batch_size=args.batch_size,
                                    multi_gpu=multi_gpu,
                                    pad_to_max=args.pad_to_max
                                    )
 
    model = Jasper(feature_config=featurizer_config, jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab))
 
    if args.ckpt is not None:
        print_once("loading model from {}".format(args.ckpt))
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'], strict=True)
        args.start_epoch = checkpoint['epoch']
    else:
        args.start_epoch = 0

    ctc_loss = CTCLossNM( num_classes=len(ctc_vocab))
    greedy_decoder = GreedyCTCDecoder()

    print_once("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))
    print_once("Number of parameters in decode: {0}".format(model.jasper_decoder.num_weights()))

    N = len(data_layer)
    if sampler_type == 'default':
        args.step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))
    elif sampler_type == 'bucket':
        args.step_per_epoch = int(len(data_layer.sampler) / args.batch_size )
    
    print_once('-----------------')
    print_once('Have {0} examples to train on.'.format(N))
    print_once('Have {0} steps / (gpu * epoch).'.format(args.step_per_epoch))
    print_once('-----------------')

    fn_lr_policy = lambda s: lr_policy(args.lr, s, args.num_epochs * args.step_per_epoch) 


    model.cuda()

    if args.optimizer_kind == "novograd":
        optimizer = Novograd(model.parameters(),
                        lr=args.lr,
                        weight_decay=args.weight_decay)
    elif args.optimizer_kind == "adam":
        optimizer = AdamW(model.parameters(),
                        lr=args.lr,
                        weight_decay=args.weight_decay)
    else:
        raise ValueError("invalid optimizer choice: {}".format(args.optimizer_kind))


    if optim_level in AmpOptimizations:
        model, optimizer = amp.initialize(
            #lnw block for error
            #min_loss_scale=1.0,
            models=model,
            optimizers=optimizer,
            opt_level=AmpOptimizations[optim_level])
    
    if args.ckpt is not None:
        optimizer.load_state_dict(checkpoint['optimizer'])

    model = model_multi_gpu(model, multi_gpu)

    train(
        data_layer=data_layer,
        data_layer_eval=data_layer_eval, 
        model=model, 
        ctc_loss=ctc_loss, 
        greedy_decoder=greedy_decoder,
        optimizer=optimizer, 
        labels=ctc_vocab, 
        optim_level=optim_level,
        multi_gpu=multi_gpu,
        fn_lr_policy=fn_lr_policy if args.lr_decay else None,
        args=args)
Пример #9
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = args.cudnn_benchmark

    multi_gpu = args.local_rank is not None
    if multi_gpu:
        print("DISTRIBUTED with ", torch.distributed.get_world_size())

    if args.fp16:
        optim_level = Optimization.mxprO3
    else:
        optim_level = Optimization.mxprO0

    model_definition = toml.load(args.model_toml)
    dataset_vocab = model_definition['labels']['labels']
    ctc_vocab = add_blank_label(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level

    if args.max_duration is not None:
        featurizer_config['max_duration'] = args.max_duration
    if args.pad_to is not None:
        featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else "max"

    print('model_config')
    print_dict(model_definition)
    print('feature_config')
    print_dict(featurizer_config)
    data_layer = None
    
    if args.wav is None:
        data_layer = AudioToTextDataLayer(
            dataset_dir=args.dataset_dir, 
            featurizer_config=featurizer_config,
            manifest_filepath=val_manifest,
            # sampler='bucket',
            sort_by_duration=args.sort_by_duration,
            labels=dataset_vocab,
            batch_size=args.batch_size,
            pad_to_max=featurizer_config['pad_to'] == "max",
            shuffle=False,
            multi_gpu=multi_gpu)
    audio_preprocessor = AudioPreprocessing(**featurizer_config)

    #encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab))
    model = RNNT(
        feature_config=featurizer_config,
        rnnt=model_definition['rnnt'],
        num_classes=len(ctc_vocab)
    )

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'], strict=False)

    if args.ipex:
        import intel_extension_for_pytorch as ipex
        from rnn import IPEXStackTime
        model.joint_net.eval()
        data_type = torch.bfloat16 if args.mix_precision else torch.float32
        if model.encoder["stack_time"].factor == 2:
            model.encoder["stack_time"] = IPEXStackTime(model.encoder["stack_time"].factor)
        model.joint_net = ipex.optimize(model.joint_net, dtype=data_type, auto_kernel_selection=True)
        model.prediction["embed"] = model.prediction["embed"].to(data_type)
        if args.jit:
            print("running jit path")
            model.joint_net.eval()
            if args.mix_precision:
                with torch.cpu.amp.autocast(), torch.no_grad():
                    model.joint_net = torch.jit.trace(model.joint_net, torch.randn(args.batch_size, 1, 1, model_definition['rnnt']['encoder_n_hidden'] + model_definition['rnnt']['pred_n_hidden']), check_trace=False)
            else:
                with torch.no_grad():
                    model.joint_net = torch.jit.trace(model.joint_net, torch.randn(args.batch_size, 1, 1, model_definition['rnnt']['encoder_n_hidden'] + model_definition['rnnt']['pred_n_hidden']), check_trace=False)
            model.joint_net = torch.jit.freeze(model.joint_net)
    else:
        model = model.to("cpu")

    #greedy_decoder = GreedyCTCDecoder()

    # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))
    if args.wav is None:
        N = len(data_layer)
        # step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_available() else torch.distributed.get_world_size())))
        step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))

        if args.steps is not None:
            print('-----------------')
            # print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_available() else torch.distributed.get_world_size())))
            print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))
            print('Have {0} warm up steps / (gpu * epoch).'.format(args.warm_up))
            print('Have {0} measure steps / (gpu * epoch).'.format(args.steps))
            print('-----------------')
        else:
            print('-----------------')
            print('Have {0} examples to eval on.'.format(N))
            print('Have {0} warm up steps / (gpu * epoch).'.format(args.warm_up))
            print('Have {0} measure steps / (gpu * epoch).'.format(step_per_epoch))
            print('-----------------')
    else:
            audio_preprocessor.featurizer.normalize = "per_feature"

    print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize)
    audio_preprocessor.eval()

    # eval_transforms = torchvision.transforms.Compose([
    #     lambda xs: [x.to(ipex.DEVICE) if args.ipex else x.cpu() for x in xs],
    #     lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]],
    #     lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]],
    # ])

    eval_transforms = torchvision.transforms.Compose([
        lambda xs: [x.cpu() for x in xs],
        lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]],
        lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]],
    ])

    model.eval()
    if args.ipex:
        ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model)

    greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model)

    eval(
        data_layer=data_layer,
        audio_processor=eval_transforms,
        encoderdecoder=model,
        greedy_decoder=greedy_decoder,
        labels=ctc_vocab,
        args=args,
        multi_gpu=multi_gpu)
Пример #10
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = args.cudnn_benchmark
    print("CUDNN BENCHMARK ", args.cudnn_benchmark)
    if not args.cpu_run:
        assert(torch.cuda.is_available())

    if args.local_rank is not None:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
    multi_gpu = args.local_rank is not None
    if multi_gpu:
        print("DISTRIBUTED with ", torch.distributed.get_world_size())

    if args.fp16:
        optim_level = 3
    else:
        optim_level = 0

    jasper_model_definition = toml.load(args.model_toml)
    dataset_vocab = jasper_model_definition['labels']['labels']
    ctc_vocab = add_ctc_labels(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = jasper_model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level
    featurizer_config["fp16"] = args.fp16
    args.use_conv_mask = jasper_model_definition['encoder'].get('convmask', True)

    if args.masked_fill is not None:
        print("{} masked_fill".format("Enabling" if args.masked_fill else "Disabling"))
        jasper_model_definition["encoder"]["conv_mask"] = args.masked_fill

    if args.max_duration is not None:
        featurizer_config['max_duration'] = args.max_duration
    if args.pad_to is not None:
        featurizer_config['pad_to'] = args.pad_to 

    if featurizer_config['pad_to'] == "max":
        featurizer_config['pad_to'] = -1
        
    print('=== model_config ===')
    print_dict(jasper_model_definition)
    print()
    print('=== feature_config ===')
    print_dict(featurizer_config)
    print()
    data_layer = None
    
    if args.wav is None:
        data_layer = AudioToTextDataLayer(
            dataset_dir=args.dataset_dir, 
            featurizer_config=featurizer_config,
            manifest_filepath=val_manifest,
            labels=dataset_vocab,
            batch_size=args.batch_size,
            pad_to_max=featurizer_config['pad_to'] == -1,
            shuffle=False,
            multi_gpu=multi_gpu)
    audio_preprocessor = AudioPreprocessing(**featurizer_config)
    encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab))

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)

        if os.path.isdir(args.ckpt):
            exit(0)
        else:
            checkpoint = torch.load(args.ckpt, map_location="cpu")
            for k in audio_preprocessor.state_dict().keys():
                checkpoint['state_dict'][k] = checkpoint['state_dict'].pop("audio_preprocessor." + k)
            audio_preprocessor.load_state_dict(checkpoint['state_dict'], strict=False)
            encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False)

    greedy_decoder = GreedyCTCDecoder()

    # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))
    if args.wav is None:
        N = len(data_layer)
        step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))

        if args.steps is not None:
            print('-----------------')
            print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))
            print('Have {0} steps / (gpu * epoch).'.format(args.steps))
            print('-----------------')
        else:
            print('-----------------')
            print('Have {0} examples to eval on.'.format(N))
            print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch))
            print('-----------------')

    print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize)
    if not args.cpu_run:
        audio_preprocessor.cuda()
        encoderdecoder.cuda()
    if args.fp16:
        encoderdecoder = amp.initialize( models=encoderdecoder,
                                         opt_level=AmpOptimizations[optim_level])

    encoderdecoder = model_multi_gpu(encoderdecoder, multi_gpu)
    audio_preprocessor.eval()
    encoderdecoder.eval()
    greedy_decoder.eval()
    
    eval(
        data_layer=data_layer,
        audio_processor=audio_preprocessor,
        encoderdecoder=encoderdecoder,
        greedy_decoder=greedy_decoder,
        labels=ctc_vocab,
        args=args,
        multi_gpu=multi_gpu)
Пример #11
0
def run_strategy(strategy, instruments=["AUD_USD"]):
    session_id = ''.join([str(random.randint(0, 9)) for _ in range(4)])
    timestamp = datetime.datetime.strftime(datetime.datetime.now(),
                                           '%Y%m%d%H%M%S')

    # parse arguments
    args = parse_args()

    # Create a cerebro entity
    cerebro = bt.Cerebro()
    cerebro.addwriter(bt.WriterFile,
                      out=f'output/test_{timestamp}.csv',
                      csv=True,
                      rounding=2)

    # oanda method 1
    # params = {
    #     "from": "2018-01-01T00:00:00Z",
    #     "granularity": "H4",
    #     "includeFirst": True,
    #     "count": 5000,
    # }
    # df = get_historical_data(instrument, params)

    # oanda method 2 (instrument factory)
    params = {
        "from": "2018-01-01T00:00:00Z",
        "granularity": "H4",
        "to": "2019-01-01T00:00:00Z"
    }

    # crypto compare BTC USD
    # from_date = cc.to_seconds_epoch(datetime.datetime(2016, 1, 1))
    # to_date = cc.to_seconds_epoch(datetime.datetime(2018, 1, 1))
    # df = cc.get_df(from_date, to_date, time_period='histoday', coin='ETH', data_folder='data')

    # df = pd.read_pickle(r"C:\Users\vhphan\PycharmProjects\packt\Learn Algorithmic Trading\Chapter5\GOOG_data.pkl")
    # df = pd.read_csv('data/data_USD_JPY_20191118172246.csv', parse_dates=True, index_col='datetime')
    # df = df.loc['2014-01-01':'2017-01-01']

    # Pass it to the backtrader datafeed and add it to the cerebro

    # 4 hours

    data = []
    for i, instrument in enumerate(instruments):
        df = get_historical_data_factory(instrument, params)
        data.append(
            bt.feeds.PandasData(dataname=df,
                                timeframe=bt.TimeFrame.Minutes,
                                compression=240))
        cerebro.adddata(data[i], name=instrument)

    # Set our desired cash start
    cerebro.broker.setcash(100000.0)
    cerebro.broker.set_shortcash(False)
    # Add a strategy
    cerebro.addstrategy(strategy)

    # Set the commission - 0.1% ... divide by 100 to remove the %
    cerebro.broker.setcommission(commission=0.001, leverage=1000_000)

    # Add a FixedSize sizer according to the stake
    # cerebro.addsizer(bt.sizers.FixedSize, stake=10)

    # Print out the starting conditions
    print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())

    # Add the analyzers we are interested in
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer, _name="ta")
    cerebro.addanalyzer(bt.analyzers.SQN, _name="sqn")
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, _name="sharpe")
    cerebro.addanalyzer(bt.analyzers.DrawDown, _name="draw_down")

    # Run over everything
    strategies = cerebro.run()
    first_strategy = strategies[0]

    # print the analyzers
    try:
        print_trade_analysis(first_strategy.analyzers.ta.get_analysis())
        # save_trade_analysis(first_strategy.analyzers.ta.get_analysis(), instrument,
        #                     f'output/analysis_{strategy.__name__}.csv')
        print_sharpe_ratio(first_strategy.analyzers.sharpe.get_analysis())
        print_sqn(first_strategy.analyzers.sqn.get_analysis())
        print_dict(first_strategy.analyzers.draw_down.get_analysis())
    except Exception as e:
        print(e)

    # Get final portfolio Value
    portfolio_value = cerebro.broker.getvalue()

    # Print out the final result
    print(f'Final Portfolio Value: ${portfolio_value:.2f}')
    # print('Final Portfolio Value: ${0:.2f}'.format(portvalue))

    # plt.style.use('seaborn-notebook')
    plt.style.use('tableau-colorblind10')
    plt.rc('grid', color='k', linestyle='-', alpha=0.1)
    plt.rc('legend', loc='best')
    # bo = Bokeh()
    # bo.plot_result(strategies)

    plot_args = dict(
        style='candlestick',
        # legendindloc='best',
        # legendloc='upper right',
        # legendloc='upper right',
        legenddataloc='upper right',
        grid=True,
        #  Format string for the display of ticks on the x axis
        fmt_x_ticks='%Y-%b-%d %H:%M',
        # Format string for the display of data points values
        fmt_x_data='%Y-%b-%d %H:%M',
        subplot=True,
        dpi=900,
        numfigs=1,
        # plotymargin=10.0,
        iplot=False)

    # save_plots(figs, instrument, strategy, timestamp)

    #  separate plot by data feed. (if there is more than one)
    if len(first_strategy.datas) > 1:
        for i in range(len(first_strategy.datas)):
            for j, d in enumerate(first_strategy.datas):
                d.plotinfo.plot = i == j  # only one data feed to be plot. others = False
                # first_strategy.observers.buysell[j].plotinfo.plot = i == j

            cerebro.plot(**plot_args)
Пример #12
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = args.cudnn_benchmark
    print("CUDNN BENCHMARK ", args.cudnn_benchmark)
    if args.cuda:
        assert (torch.cuda.is_available())

    model_definition = toml.load(args.model_toml)
    dataset_vocab = model_definition['labels']['labels']
    ctc_vocab = add_blank_label(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = model_definition['input_eval']

    if args.pad_to is not None:
        featurizer_config[
            'pad_to'] = args.pad_to if args.pad_to >= 0 else "max"

    print('model_config')
    print_dict(model_definition)
    print('feature_config')
    print_dict(featurizer_config)
    data_layer = None

    data_layer = AudioToTextDataLayer(
        dataset_dir=args.dataset_dir,
        featurizer_config=featurizer_config,
        manifest_filepath=val_manifest,
        labels=dataset_vocab,
        batch_size=args.batch_size,
        pad_to_max=featurizer_config['pad_to'] == "max",
        shuffle=False)
    audio_preprocessor = AudioPreprocessing(**featurizer_config)

    model = RNNT(feature_config=featurizer_config,
                 rnnt=model_definition['rnnt'],
                 num_classes=len(ctc_vocab))

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'], strict=False)

    # model = torch.jit.script(model)

    audio_preprocessor.featurizer.normalize = "per_feature"

    if args.cuda:
        audio_preprocessor.cuda()
    audio_preprocessor.eval()

    eval_transforms = []
    if args.cuda:
        eval_transforms.append(lambda xs: [x.cuda() for x in xs])
    eval_transforms.append(lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]])
    # These are just some very confusing transposes, that's all.
    # BxFxT -> TxBxF
    eval_transforms.append(lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]])
    eval_transforms = torchvision.transforms.Compose(eval_transforms)

    if args.cuda:
        model.cuda()

    # Ideally, I would jit this as well... But this is just the constructor...
    greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model)

    eval(data_layer=data_layer,
         audio_processor=eval_transforms,
         encoderdecoder=model,
         greedy_decoder=greedy_decoder,
         labels=ctc_vocab,
         args=args)
Пример #13
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.backends.cudnn.benchmark = args.cudnn_benchmark
    print("CUDNN BENCHMARK ", args.cudnn_benchmark)
    assert(torch.cuda.is_available())

    if args.local_rank is not None:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl', init_method='env://')
    multi_gpu = args.local_rank is not None
    if multi_gpu:
        print("DISTRIBUTED with ", torch.distributed.get_world_size())

    if args.fp16:
        optim_level = Optimization.mxprO3
    else:
        optim_level = Optimization.mxprO0

    model_definition = toml.load(args.model_toml)
    dataset_vocab = model_definition['labels']['labels']
    ctc_vocab = add_blank_label(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level

    if args.max_duration is not None:
        featurizer_config['max_duration'] = args.max_duration
    if args.pad_to is not None:
        featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else "max"

    print('model_config')
    print_dict(model_definition)
    print('feature_config')
    print_dict(featurizer_config)
    data_layer = None
    
    if args.wav is None:
        data_layer = AudioToTextDataLayer(
            dataset_dir=args.dataset_dir, 
            featurizer_config=featurizer_config,
            manifest_filepath=val_manifest,
            labels=dataset_vocab,
            batch_size=args.batch_size,
            pad_to_max=featurizer_config['pad_to'] == "max",
            shuffle=False,
            multi_gpu=multi_gpu)
    audio_preprocessor = AudioPreprocessing(**featurizer_config)

    #encoderdecoder = JasperEncoderDecoder(jasper_model_definition=jasper_model_definition, feat_in=1024, num_classes=len(ctc_vocab))
    model = RNNT(
        feature_config=featurizer_config,
        rnnt=model_definition['rnnt'],
        num_classes=len(ctc_vocab)
    )

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'], strict=False)

    #greedy_decoder = GreedyCTCDecoder()

    # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))
    if args.wav is None:
        N = len(data_layer)
        step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))

        if args.steps is not None:
            print('-----------------')
            print('Have {0} examples to eval on.'.format(args.steps * args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))
            print('Have {0} steps / (gpu * epoch).'.format(args.steps))
            print('-----------------')
        else:
            print('-----------------')
            print('Have {0} examples to eval on.'.format(N))
            print('Have {0} steps / (gpu * epoch).'.format(step_per_epoch))
            print('-----------------')
    else:
            audio_preprocessor.featurizer.normalize = "per_feature"

    print ("audio_preprocessor.normalize: ", audio_preprocessor.featurizer.normalize)
    audio_preprocessor.cuda()
    audio_preprocessor.eval()

    eval_transforms = torchvision.transforms.Compose([
        lambda xs: [x.cuda() for x in xs],
        lambda xs: [*audio_preprocessor(xs[0:2]), *xs[2:]],
        lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]],
    ])

    model.cuda()
    if args.fp16:
        model = amp.initialize(
            models=model,
            opt_level=AmpOptimizations[optim_level])

    model = model_multi_gpu(model, multi_gpu)

    greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model)

    eval(
        data_layer=data_layer,
        audio_processor=eval_transforms,
        encoderdecoder=model,
        greedy_decoder=greedy_decoder,
        labels=ctc_vocab,
        args=args,
        multi_gpu=multi_gpu)
Пример #14
0
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    args.local_rank = os.environ.get('LOCAL_RANK', args.local_rank)
    # set up distributed training
    cpu_distributed_training = False
    if torch.distributed.is_available() and int(os.environ.get('PMI_SIZE', '0')) > 1:
        print('Distributed training with DDP')
        os.environ['RANK'] = os.environ.get('PMI_RANK', '0')
        os.environ['WORLD_SIZE'] = os.environ.get('PMI_SIZE', '1')
        if not 'MASTER_ADDR' in os.environ:
            os.environ['MASTER_ADDR'] = args.master_addr
        if not 'MASTER_PORT' in os.environ:
            os.environ['MASTER_PORT'] = args.port

        # Initialize the process group with ccl backend
        if args.backend == 'ccl':
            import torch_ccl
        dist.init_process_group(
                backend=args.backend                
        )
        cpu_distributed_training = True
        if torch.distributed.is_initialized():
            print("Torch distributed is initialized.")
            args.rank = torch.distributed.get_rank()
            args.world_size = torch.distributed.get_world_size()
        else:
            print("Torch distributed is not initialized.")
            args.rank = 0
            args.world_size = 1

    multi_gpu = False
    if multi_gpu:
        print_once("DISTRIBUTED TRAINING with {} gpus".format(torch.distributed.get_world_size()))

    optim_level = Optimization.mxprO0

    model_definition = toml.load(args.model_toml)
    dataset_vocab = model_definition['labels']['labels']
    ctc_vocab = add_blank_label(dataset_vocab)

    train_manifest = args.train_manifest
    val_manifest = args.val_manifest
    tst_manifest = args.tst_manifest
    featurizer_config = model_definition['input']
    featurizer_config_eval = model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level
    featurizer_config_eval["optimization_level"] = optim_level

    sampler_type = featurizer_config.get("sampler", 'default')
    perturb_config = model_definition.get('perturb', None)
    if args.pad_to_max:
        assert(args.max_duration > 0)
        featurizer_config['max_duration'] = args.max_duration
        featurizer_config_eval['max_duration'] = args.max_duration
        featurizer_config['pad_to'] = "max"
        featurizer_config_eval['pad_to'] = "max"
    print_once('model_config')
    print_dict(model_definition)

    if args.gradient_accumulation_steps < 1:
        raise ValueError('Invalid gradient accumulation steps parameter {}'.format(args.gradient_accumulation_steps))
    if args.batch_size % args.gradient_accumulation_steps != 0:
        raise ValueError('gradient accumulation step {} is not divisible by batch size {}'.format(args.gradient_accumulation_steps, args.batch_size))


    preprocessor = preprocessing.AudioPreprocessing(**featurizer_config)
    if args.cuda:
        preprocessor.cuda()
    else:
        preprocessor.cpu()

    augmentations = preprocessing.SpectrogramAugmentation(**featurizer_config)
    if args.cuda:
        augmentations.cuda()
    else:
        augmentations.cpu()

    train_transforms = torchvision.transforms.Compose([
        lambda xs: [x.cpu() for x in xs],
        lambda xs: [*preprocessor(xs[0:2]), *xs[2:]],
        lambda xs: [augmentations(xs[0]),   *xs[1:]],
        lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]],
    ])

    eval_transforms = torchvision.transforms.Compose([
        lambda xs: [x.cpu() for x in xs],
        lambda xs: [*preprocessor(xs[0:2]), *xs[2:]],
        lambda xs: [xs[0].permute(2, 0, 1), *xs[1:]],
    ])

    data_layer = AudioToTextDataLayer(
                                    dataset_dir=args.dataset_dir,
                                    featurizer_config=featurizer_config,
                                    perturb_config=perturb_config,
                                    manifest_filepath=train_manifest,
                                    labels=dataset_vocab,
                                    batch_size=args.batch_size // args.gradient_accumulation_steps,
                                    multi_gpu=multi_gpu,
                                    pad_to_max=args.pad_to_max,
                                    sampler=sampler_type,
                                    cpu_distributed_training=cpu_distributed_training)

    eval_datasets = [(
        AudioToTextDataLayer(
            dataset_dir=args.dataset_dir,
            featurizer_config=featurizer_config_eval,
            manifest_filepath=val_manifest,
            labels=dataset_vocab,
            batch_size=args.eval_batch_size,
            multi_gpu=multi_gpu,
            pad_to_max=args.pad_to_max
        ),
        args.eval_frequency,
        'Eval clean',
    )]

    if tst_manifest:
        eval_datasets.append((
            AudioToTextDataLayer(
                dataset_dir=args.dataset_dir,
                featurizer_config=featurizer_config_eval,
                manifest_filepath=tst_manifest,
                labels=dataset_vocab,
                batch_size=args.eval_batch_size,
                multi_gpu=multi_gpu,
                pad_to_max=args.pad_to_max
            ),
            args.test_frequency,
            'Test other',
        ))

    model = RNNT(
        feature_config=featurizer_config,
        rnnt=model_definition['rnnt'],
        num_classes=len(ctc_vocab)
    )

    if args.ckpt is not None:
        print_once("loading model from {}".format(args.ckpt))
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        model.load_state_dict(checkpoint['state_dict'], strict=True)
        args.start_epoch = checkpoint['epoch']
    else:
        args.start_epoch = 0

    loss_fn = RNNTLoss(blank=len(ctc_vocab) - 1)

    N = len(data_layer)
    if sampler_type == 'default':
        args.step_per_epoch = math.ceil(N / (args.batch_size * (1 if not torch.distributed.is_initialized() else torch.distributed.get_world_size())))
    elif sampler_type == 'bucket':
        args.step_per_epoch = int(len(data_layer.sampler) / args.batch_size )

    print_once('-----------------')
    print_once('Have {0} examples to train on.'.format(N))
    print_once('Have {0} steps / (gpu * epoch).'.format(args.step_per_epoch))
    print_once('-----------------')

    constant_lr_policy = lambda _: args.lr
    fn_lr_policy = constant_lr_policy
    if args.lr_decay:
        pre_decay_policy = fn_lr_policy
        fn_lr_policy = lambda s: lr_decay(args.num_epochs * args.step_per_epoch, s, pre_decay_policy(s))
    if args.lr_warmup:
        pre_warmup_policy = fn_lr_policy
        fn_lr_policy = lambda s: lr_warmup(args.lr_warmup, s, pre_warmup_policy(s) )

    if args.optimizer_kind == "novograd":
        optimizer = Novograd(model.parameters(),
                        lr=args.lr,
                        weight_decay=args.weight_decay)
    elif args.optimizer_kind == "adam":
        optimizer = AdamW(model.parameters(),
                        lr=args.lr,
                        weight_decay=args.weight_decay)
    else:
        raise ValueError("invalid optimizer choice: {}".format(args.optimizer_kind))

    if args.cuda and optim_level in AmpOptimizations:
        assert False, "not supported in ipex"

    if args.ckpt is not None:
        optimizer.load_state_dict(checkpoint['optimizer'])

    if args.ipex:
        if args.bf16:
            model, optimizer = ipex.optimize(model, dtype=torch.bfloat16, optimizer=optimizer)
            ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model)
        else:
            model, optimizer = ipex.optimize(model, dtype=torch.float32, optimizer=optimizer)
            ipex.nn.utils._model_convert.replace_lstm_with_ipex_lstm(model)

    if args.world_size > 1:
        device_ids = None
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=device_ids)

    print_once(model)
    print_once("# parameters: {}".format(sum(p.numel() for p in model.parameters())))
    greedy_decoder = RNNTGreedyDecoder(len(ctc_vocab) - 1, model.module if multi_gpu else model)

    if args.tb_path and args.local_rank == 0:
        logger = TensorBoardLogger(args.tb_path, model.module if multi_gpu else model, args.histogram)
    else:
        logger = DummyLogger()

    train(
        data_layer=data_layer,
        model=model,
        loss_fn=loss_fn,
        greedy_decoder=greedy_decoder,
        optimizer=optimizer,
        data_transforms=train_transforms,
        labels=ctc_vocab,
        optim_level=optim_level,
        multi_gpu=multi_gpu,
        fn_lr_policy=fn_lr_policy,
        evalutaion=evaluator(model, eval_transforms, loss_fn, greedy_decoder, ctc_vocab, eval_datasets, logger),
        logger=logger,
        args=args)
def main(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    assert (args.steps is None or args.steps > 5)

    if args.cpu:
        device = torch.device('cpu')
    else:
        assert (torch.cuda.is_available())
        device = torch.device('cuda')
        torch.backends.cudnn.benchmark = args.cudnn_benchmark
        print("CUDNN BENCHMARK ", args.cudnn_benchmark)

    optim_level = 3 if args.amp else 0
    batch_size = args.batch_size

    jasper_model_definition = toml.load(args.model_toml)
    dataset_vocab = jasper_model_definition['labels']['labels']
    ctc_vocab = add_ctc_labels(dataset_vocab)

    val_manifest = args.val_manifest
    featurizer_config = jasper_model_definition['input_eval']
    featurizer_config["optimization_level"] = optim_level

    if args.max_duration is not None:
        featurizer_config['max_duration'] = args.max_duration

    # TORCHSCRIPT: Cant use mixed types. Using -1 for "max"
    if args.pad_to is not None:
        featurizer_config['pad_to'] = args.pad_to if args.pad_to >= 0 else -1

    if featurizer_config['pad_to'] == "max":
        featurizer_config['pad_to'] = -1

    args.use_conv_mask = jasper_model_definition['encoder'].get(
        'convmask', True)
    if args.use_conv_mask and args.torch_script:
        print(
            'WARNING: Masked convs currently not supported for TorchScript. Disabling.'
        )
        jasper_model_definition['encoder']['convmask'] = False

    print('model_config')
    print_dict(jasper_model_definition)
    print('feature_config')
    print_dict(featurizer_config)

    data_layer = AudioToTextDataLayer(
        dataset_dir=args.dataset_dir,
        featurizer_config=featurizer_config,
        manifest_filepath=val_manifest,
        labels=dataset_vocab,
        batch_size=batch_size,
        pad_to_max=featurizer_config['pad_to'] == -1,
        shuffle=False,
        multi_gpu=False)

    audio_preprocessor = AudioPreprocessing(**featurizer_config)

    encoderdecoder = JasperEncoderDecoder(
        jasper_model_definition=jasper_model_definition,
        feat_in=1024,
        num_classes=len(ctc_vocab))

    if args.ckpt is not None:
        print("loading model from ", args.ckpt)
        checkpoint = torch.load(args.ckpt, map_location="cpu")
        for k in audio_preprocessor.state_dict().keys():
            checkpoint['state_dict'][k] = checkpoint['state_dict'].pop(
                "audio_preprocessor." + k)
        audio_preprocessor.load_state_dict(checkpoint['state_dict'],
                                           strict=False)
        encoderdecoder.load_state_dict(checkpoint['state_dict'], strict=False)

    greedy_decoder = GreedyCTCDecoder()

    # print("Number of parameters in encoder: {0}".format(model.jasper_encoder.num_weights()))

    N = len(data_layer)
    step_per_epoch = math.ceil(N / args.batch_size)

    print('-----------------')
    if args.steps is None:
        print('Have {0} examples to eval on.'.format(N))
        print('Have {0} steps / (epoch).'.format(step_per_epoch))
    else:
        print('Have {0} examples to eval on.'.format(args.steps *
                                                     args.batch_size))
        print('Have {0} steps / (epoch).'.format(args.steps))
    print('-----------------')

    audio_preprocessor.to(device)
    encoderdecoder.to(device)

    if args.amp:
        encoderdecoder = amp.initialize(models=encoderdecoder,
                                        opt_level='O' + str(optim_level))

    eval(data_layer=data_layer,
         audio_processor=audio_preprocessor,
         encoderdecoder=encoderdecoder,
         greedy_decoder=greedy_decoder,
         labels=ctc_vocab,
         device=device,
         args=args)