def main():

  data_dir = '/tmp/mnist'
  model_dir = '/tmp/model'
  batch_size = 128

  delete_dir(model_dir)

  data_format = ('channels_first' if tf.test.is_built_with_cuda() else 'channels_last')

  params = {
      'data_format': data_format,
      'learning_rate': 1e-4
  }

  estimator = tf.estimator.Estimator(
      model_fn=model_fn,
      model_dir=model_dir,
      params=params)

  def train_input_fn():
    ds = dataset.train(data_dir)
    ds = ds.cache()
    ds = ds.shuffle(buffer_size=50000)
    ds = ds.batch(batch_size)
    ds = ds.repeat(1)
    return ds      

  def eval_input_fn():
    ds = dataset.test(data_dir)
    ds = ds.batch(batch_size)
    return ds

  print('Train model')
  train_hooks = [tf.train.LoggingTensorHook(tensors=[z'cross_entropy', 'train_accuracy'], every_n_iter=20)]
def create_data(output_dir,
                source_train,
                source_test,
                source_dev,
                gen_batch=64,
                length=128,
                device=0):
    delete_dir(output_dir)
    os.makedirs(output_dir)
    for datatype, source_path in zip(["train", "test", "dev"],
                                     [source_train, source_test, source_dev]):
        with open(source_path) as f:
            data = f.read()
        data = data.split("<|endoftext|>")
        data = data[1:-1]
        len_ = len(data)
        data = [text.replace("\n", "").replace("\t", "") for text in data]
        steps = int(len_ / gen_batch) + 1

        texts = []

        for _ in trange(steps):
            texts += generate_text(
                model,
                tokenizer,
                prompt="",
                length=length,
                num_return_sequences=gen_batch,
                device=device,
            )
        texts = texts[:len_]

        # Cleaning the strings:
        texts = [text.replace("\n", "").replace("\t", "") for text in texts]

        total_data = []
        labels = [0] * len_ + [1] * len_

        import random
        random.shuffle(labels)
        for label in labels:
            if label:
                total_data.append(data.pop())
            else:
                total_data.append(texts.pop())
        df = pd.DataFrame({"sentence": total_data, "label": labels})
        if datatype == "test":
            df["sentence"].to_csv(join(output_dir, "test.tsv"),
                                  sep="\t",
                                  index_label="index")
            df["label"].to_csv(join(output_dir, "test_answers.tsv"),
                               sep="\t",
                               index_label="index")
        else:
            df.to_csv(join(output_dir, datatype + ".tsv"),
                      sep="\t",
                      index=False)
예제 #3
0
 def tearDownClass(self):
     """
     clean up
     :return:
     """
     self.logging.info('--> TestFilesystems.tearDownClass()')
     self.logging.debug('delete the partition created in setup class')
     utils.delete_dir(self.mountpt)
     utils.delete_dir(self.nfs_mount_pt)
     utils.del_eckd_partition(self.dev)
     self.logging.info('<-- TestFilesystems.tearDownClass()')
예제 #4
0
파일: run_rouge.py 프로젝트: JudeLee19/APES
def rouge(preds_file, targets_file):
    temp_targets_dir, temp_preds_dir = './temp_targets/', './temp_preds/'
    summaries_to_rouge_format(targets_file, temp_targets_dir, "targets")
    summaries_to_rouge_format(preds_file, temp_preds_dir, "preds")

    os.system("""python -m rouge.rouge \
	           --target_filepattern={}*.targets \
	           --prediction_filepattern={}*.preds \
	           --output_filename=rouge_scores.csv""".format(
        temp_targets_dir, temp_preds_dir))

    delete_dir(temp_targets_dir)
    delete_dir(temp_preds_dir)
예제 #5
0
    def download(self):

        utils.delete_dir(self.base_path)

        if not self.dict_result:
            return

        info_df = pd.DataFrame(self.dict_result.values(),
                               index=self.dict_result.keys())
        if len(info_df) > 0:
            dir_path = f'{self.base_path}/{settings.xbrl_dir_name}{settings.since}/'
            self.__make_directory(dir_path)
            self.__download_all_xbrl_files(info_df, dir_path)
예제 #6
0
def train(
    dataloader,
    output_dir,
    epochs=6,
    log_steps=200,
    learning_rate=5e-6,
    fp16=True,
    debug_stop=False,
    device=0,
    optimizer=None,
    lr_scheduler=None,
):
    if optimizer is None:
        optimizer, lr_scheduler = create_optimizer_and_scheduler(
            model, dataloader, epochs, learning_rate=learning_rate)
    delete_dir(output_dir)
    for epoch in range(epochs):
        total_loss = 0
        for step, inputs in enumerate(tqdm(dataloader), 1):
            model.train()
            _prepare_inputs(inputs, device)

            if fp16:
                total_loss += fp16_train_step(model, inputs) / log_steps
                fp16_optimizer_step(model, optimizer)
            else:
                total_loss += train_step(model, inputs) / log_steps
                optimizer_step(model, optimizer)

            lr_scheduler.step()

            model.zero_grad()

            # Logging
            if not ((step) % log_steps):
                print(
                    f"step: {step} (lr = {optimizer.param_groups[0]['lr']}), loss: {total_loss}"
                )

                total_loss = 0
                if debug_stop:
                    break
        eval_loss = evaluate(testloader, fp16=fp16, device=0)
        perplexity = float(math.exp(eval_loss))
        print("perplexity:", perplexity)
        save_dir = '{}_ep{}_perplexity{}'.format(int(time()), epoch,
                                                 perplexity)
        save(model, save_dir, optimizer, lr_scheduler, output_dir)
예제 #7
0
def main():

    data_dir = '/tmp/mnist'
    model_dir = '/tmp/model'
    batch_size = 128
    use_dataset = True

    delete_dir(model_dir)

    def train_input_fn():
        ds = dataset.train(data_dir)
        ds = ds.repeat(None)
        ds = ds.shuffle(buffer_size=50000)
        ds = ds.batch(batch_size)
        return ds

    def eval_input_fn():
        ds = dataset.test(data_dir)
        ds = ds.batch(batch_size)
        return ds

    model_params = {
        'learning_rate': 1e-4,
        'hidden_size': 512,
        'keep_rate': 0.5
    }
    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=model_dir,
                                       params=model_params)

    for iter in range(100):

        print('Train model')
        train_hooks = [
            tf.train.LoggingTensorHook(
                tensors=['global_step', 'cross_entropy'], every_n_iter=1),
            SampleHook(loss_tensor='cross_entropy', checkpoint_path=model_dir)
        ]
        estimator.train(input_fn=train_input_fn, steps=200, hooks=train_hooks)

        print('Evaluate model')
        eval_hooks = [
            tf.train.LoggingTensorHook(tensors=['global_step'], every_n_iter=1)
        ]
        eval_results = estimator.evaluate(input_fn=eval_input_fn,
                                          hooks=eval_hooks)
        print('Eval loss: %s' % eval_results['loss'])
        print('Eval accuracy: %s' % eval_results['accuracy'])
예제 #8
0
    def uploadPackage(self, request_iterator, context):
        """
        上传app包
        :param request_iterator:
        :param context:
        :return:
        """
        LOG.info('receive upload package msg...')
        res = UploadPackageResponse(status=utils.FAILURE)

        parameters = UploadPackageRequest(request_iterator)

        host_ip = validate_input_params(parameters)
        if host_ip is None:
            parameters.delete_tmp()
            return res

        app_package_id = parameters.app_package_id
        if app_package_id is None:
            LOG.debug('appPackageId is required')
            parameters.delete_tmp()
            return res
        app_package_path = utils.APP_PACKAGE_DIR + '/' + host_ip + '/' + parameters.app_package_id
        if utils.exists_path(app_package_path):
            LOG.debug('app package exist')
            parameters.delete_tmp()
            return res
        utils.create_dir(app_package_path)
        try:
            LOG.debug('unzip package')
            with zipfile.ZipFile(parameters.tmp_package_file_path) as zip_file:
                namelist = zip_file.namelist()
                for file in namelist:
                    zip_file.extract(file, app_package_path)
            pkg = CsarPkg(app_package_path)
            pkg.translate()
            res.status = utils.SUCCESS
        except Exception as exception:
            LOG.error(exception, exc_info=True)
            utils.delete_dir(app_package_path)
        finally:
            parameters.delete_tmp()
        return res
예제 #9
0
def backup_info(request, cluster_id, path=None):
    if path is None:
        path = "/home/backup/"
    path = u.join_path(path, cluster_id)
    try:
        u.create_dir(path)
        ct_id, ukp_id, default_image_id, instance_ids = save_cluster_info(request, cluster_id, path)
        node_groups_template_ids = save_cluster_template_info(request, ct_id, path)
        flavor_ids, image_ids, security_group_ids = save_node_groups_info(request, node_groups_template_ids, path)
        image_ids = append_to_list(default_image_id, image_ids)
        save_key_pair(request, ukp_id, path)
        save_flavors_info(request, flavor_ids, path)
        save_security_group_info(request, security_group_ids, path)
        save_images_tag(request, image_ids, path)
        #backup_instance(request, instance_ids, path)
    except Exception:
        u.delete_dir(path)
        return False
    else:
        return True
예제 #10
0
def create_dataset(
    data_source,
    output_dir,
    required_in_title=None,
    upvoted=False,
    skip_first=0,
    bad_words=None,
):
    with open(data_source) as f:
        data = json.load(f)
    delete_dir(output_dir)
    makedirs(output_dir)
    data = [x for x in data if ("selftext" not in x) or (not x["selftext"])]
    if required_in_title is not None:
        for required in required_in_title:
            data = [x for x in data if required in x["title"].lower()]
    if upvoted:
        data = [x for x in data if x["score"] > 1]
    data = [x["title"] for x in data]
    data = data[skip_first:]  # Removing the first 1000 older examples
    # Trying to remove most prompts that directly ask to write or describe something
    if bad_words is not None:
        for word in bad_words:
            data = [x for x in data if (word not in x.lower())]
    data = [remove_tags(x) for x in data]
    data = list(set(data))  # removing reposts
    split_1 = int(len(data) * .9)
    split_2 = int(len(data) * .95)
    random.shuffle(data)

    data_train = data[:split_1]
    print("Train dataset of length", len(data_train))
    data_test = data[split_1:split_2]
    print("Test dataset of length", len(data_test))
    data_val = data[split_2:]
    print("Dev dataset of length", len(data_val))

    write(join(output_dir, "train.txt"), data_train)
    write(join(output_dir, "test.txt"), data_test)
    write(join(output_dir, "dev.txt"), data_val)
    print("data saved in", join(getcwd(), output_dir))
예제 #11
0
    def deletePackage(self, request, context):
        """
        删除app包
        :param request:
        :param context:
        :return:
        """
        LOG.info('receive delete package msg...')
        res = DeletePackageResponse(status=utils.FAILURE)

        host_ip = validate_input_params(request)
        if host_ip is None:
            return res

        app_package_id = request.appPackageId
        if not app_package_id:
            return res

        app_package_path = utils.APP_PACKAGE_DIR + '/' + host_ip + '/' + app_package_id
        utils.delete_dir(app_package_path)

        res.status = utils.SUCCESS
        return res
예제 #12
0
def draw_document_distribution(trending_topics, count_topics, total, domain):
    domain_nor = domain.replace(u' ', u'-').lower()
    output_dir = os.path.join(u'static', domain_nor)
    utils.delete_dir(output_dir)
    utils.mkdir(output_dir)
    objects = []
    for k in xrange(len(count_topics)):
        try:
            if len(count_topics) >= 50:
                _ = trending_topics[k]
            objects.append(unicode(k))
        except:
            objects.append(u'')
    performance = map(lambda x: x * 100, count_topics)
    y_pos = np.arange(len(objects))
    plt.bar(y_pos, performance, align='center', alpha=0.5)
    plt.xticks(y_pos, objects)
    plt.ylabel('percent')
    plt.title('Document distribution by topics - num_docs = %d' % (total))
    # plt.show()
    plt.tight_layout(pad=0.4, w_pad=1.4, h_pad=1.0)

    plt.savefig(os.path.join(output_dir, 'documents_distribution.png'),
                dpi=100)
예제 #13
0
 def tearDown(self):
     utils.delete_dir("")
def get_download(branch, download_store_path, list_products):
    site_url = 'http://lisbon-build.pentaho.com/hosted/' + branch + '/latest/'
    file_build_info = 'build.info'

    ##
    # READ LATEST DOWNLOAD BUILD FROM LOCAL FILE
    # 1. read local file
    # 2. read latest download build
    ##
    # Download all files
    # Download build info file - to know the current version
    download_build_info = site_url + file_build_info
    log.debug(download_build_info)
    try:
        response = urllib.request.urlopen(download_build_info, timeout=50)
    except Exception as e:
        log.debug('Something went wrong.')
        log.debug(e)
        sys.exit('Something went wrong. Download Build.Info.')

    data = response.read().decode('utf-8')
    data = data.replace('\n', ' ')
    log.debug('Content: ' + data)
    build_info_version = data.split(' ')[0]

    downloaded_file = []
    if list_products:
        list_download_artifacts = list_products
    else:
        list_download_artifacts = glist_download_artifacts

    log.debug(list_products)
    log.debug(list_download_artifacts)

    for filename in list_download_artifacts:
        try:
            require_version_check = False
            need_download_new_build = True
            log.debug('-------')
            log.debug('DOWNLOADING [' + filename + ']')
            log.debug('-------')
            latest_version = 0

            # 1. read local file
            latest_build_file_path = os.path.join(os.environ['TMP'],
                                                  'pentahobuildinfo', branch,
                                                  filename)
            latest_build_file_path = os.path.normpath(latest_build_file_path)
            os.makedirs(latest_build_file_path, exist_ok=True)
            latest_build_file_path = os.path.join(
                os.path.normpath(latest_build_file_path),
                'last_download_build.txt')

            # 1.1. If the local file doesn't exist, then we need to download a new build
            if not os.path.isfile(latest_build_file_path):
                log.debug('File does not exist.')
            else:
                require_version_check = True
                log.info('File exist: [' + latest_build_file_path + '].')
                # Need to read file and get version number.
                with open(latest_build_file_path, "r") as file_handler:
                    latest_version = file_handler.read().replace('\n',
                                                                 '').replace(
                                                                     ' ', '')
                    log.debug('Previous saved build was [' + latest_version +
                              ']')

            # 2. Download the "Build.info".
            log.debug("Require New Version Checker [" +
                      str(require_version_check) + '].')
            log.debug("Download Build Info version [" + build_info_version +
                      '].')
            log.debug("Last downloaded version [" + str(latest_version) + '].')

            if require_version_check and (latest_version
                                          == build_info_version):
                log.debug(
                    'We do not need to download a new version. Latest [' +
                    latest_version + '] and Current [' + build_info_version +
                    ']')
                need_download_new_build = False

            if need_download_new_build:
                download_fail = False
                log.debug('We are going to download a new version [' +
                          build_info_version + ']')

                # Create directory if does not exist or delete all contents of it
                download_store_path = os.path.realpath(download_store_path)
                log.debug('Writing content to this directory [' +
                          download_store_path + ']')
                if not os.path.exists(download_store_path):
                    log.debug('Create directory [' + download_store_path + ']')
                    os.makedirs(download_store_path)

                # Let's download the files.
                store_directory = os.path.join(download_store_path, filename)

                # Going to delete the folder of the artifact and the zip file
                tmp_store_filename = os.path.join(download_store_path,
                                                  filename) + '.zip'
                log.debug('Deleting previous artifacts.')
                # delete the folder - unzipped previously e.g. pentaho-server-ce
                log.debug('Delete store directory [' + store_directory + ']')
                utils.delete_dir(store_directory)
                # delete the download zip previously e.g. pentaho-server-ce.zip
                log.debug('Delete zip file [' + tmp_store_filename + ']')
                utils.delete_file(tmp_store_filename)
                download_url = site_url + filename + '.zip'
                log.debug('Downloading file: [' + filename + '] [' +
                          download_url + '].')

                wget.download(download_url, download_store_path)
                log.debug('Download completed!')

                # Need to save in the file the download version
                # We are using the CONTENT MANAGER that close the stream for us
                with open(latest_build_file_path, "w+") as text_file:
                    print(build_info_version, file=text_file)
                    log.debug('Save on file [' + latest_build_file_path +
                              '] the latest build version [' +
                              build_info_version + '].')

                utils.unzip_single_file(download_store_path, filename)
        except Exception as e:
            download_fail = True
            log.exception(e)
            break
예제 #15
0
파일: test_all.py 프로젝트: pmuellr/slowebs
suite  = unittest.TestSuite()
result = unittest.TestResult()
runner = unittest.TextTestRunner(verbosity=2)
    
#-------------------------------------------------------------------
# run tests in the listed modules
#-------------------------------------------------------------------

moduleNames = """
    test_list
    test_redirect
    test_read
    test_write
    test_cross_origin
    test_browser
""".split()

modules = [__import__(moduleName) for moduleName in moduleNames]

for module in modules: 
    suite.addTest(unittest.defaultTestLoader.loadTestsFromModule(module))

server = utils.Server()
server.start()

try:
    runner.run(suite)
finally:
    server.stop()
    utils.delete_dir("")
    
예제 #16
0
# start
root_dir = os.getcwd()
token = read_file_contents('token')
github = 'https://api.github.com/repos/KanoComputing/{}/tarball/{}'

for name, branch in repos_selected:
    url = github.format(name, branch)
    dir_str = '{}___{}'.format(name, branch)
    dir_path = os.path.join(root_dir, dir_str)
    debfile = ''
    print

    if args.down:
        print 'Downloading {} ...'.format(dir_str)

        delete_dir(dir_path)
        ensure_dir(dir_path)
        os.chdir(dir_path)

        if not token:
            cmd = 'curl -L -v -o tmp.tgz {url}'.format(url=url)
        else:
            cmd = 'curl -H "Authorization: token {token}" -L -v -o tmp.tgz {url}'.format(
                token=token, url=url)
        _, e, _ = run_cmd(cmd)

        if args.verbose:
            print e

        if '< Status: 302 Found' in e:
            print 'OK'
예제 #17
0
 def reset(self):
     utils.delete_dir(self.result_dir)
예제 #18
0
EXCLUDE_DIRS = [
    '.vscode', 'node_modules', 'log', '.git', 'dist', '.idea', '.nvmrc',
    '.DS_Store', '__pycache__', 'coverage', '.nyc_output'
]
EXCLUDE_FILES = [
    '.gitignore', '.cfignore', 'package-lock.json', '.directory', 'README.md',
    '.nyc_output'
]
EXCLUDE_EXTENSION = [
    '.jpg', '.png', '.jpeg', '.md', '.swap', '.opts', '.log', '.svg', '.pdf',
    '.otf', '.ttf', '.eot', '.woff2', '.woff', '.gif', '.psd', '.xls', '.xlsx'
]
CLONE_DIR = './repositories'
OUTPUT_DIR = './output/'

delete_dir(dir_path=CLONE_DIR)
delete_dir(dir_path=OUTPUT_DIR)
create_dir(dir_path=CLONE_DIR)
create_dir(dir_path=OUTPUT_DIR)

for project in PROJECTS:
    project_base_dirs = []
    project_base_files = []
    project_exclude_files = EXCLUDE_FILES + project["exclude_extension"]
    project_exclude_extension = EXCLUDE_EXTENSION + project["exclude_extension"]

    clone_repository_dir = os.path.join(CLONE_DIR, project["name"])
    git_clone(CLONE_DIR, project["repository"])
    for item in os.listdir(clone_repository_dir):
        item_path = os.path.join(clone_repository_dir, item)
        if os.path.isdir(item_path):
예제 #19
0
 def delete_tmp(self):
     """
     删除临时文件
     """
     utils.delete_dir(self._tmp_package_dir)
예제 #20
0
def main():

    data_dir = '/tmp/mnist'
    model_dir = '/tmp/model'
    batch_size = 128
    use_dataset = True

    delete_dir(model_dir)

    if use_dataset:
        # Use `tf.data.Dataset` to read train and eval data.
        def train_input_fn():
            ds = dataset.train(data_dir)
            ds = ds.cache()
            ds = ds.shuffle(buffer_size=50000)
            ds = ds.batch(batch_size)
            ds = ds.repeat(1)
            return ds

        def eval_input_fn():
            ds = dataset.test(data_dir)
            ds = ds.batch(batch_size)
            return ds

    else:
        # Use `numpy_input_fn()` to read train and evaluation data
        # from Numpy arrays.
        mnist = input_data.read_data_sets(data_dir)
        train_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={'X': mnist.train.images},
            y=mnist.train.labels.astype(np.int32),
            num_epochs=1,
            batch_size=batch_size,
            shuffle=True)
        eval_input_fn = tf.estimator.inputs.numpy_input_fn(
            x={'X': mnist.test.images},
            y=mnist.test.labels.astype(np.int32),
            num_epochs=1,
            batch_size=batch_size,
            shuffle=False)

    model_params = {
        'learning_rate': 1e-4,
        'hidden_size': 512,
        'keep_rate': 0.5
    }
    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       model_dir=model_dir,
                                       params=model_params)

    print('Train model')
    train_hooks = [
        tf.train.LoggingTensorHook(
            tensors=['learning_rate', 'cross_entropy', 'train_accuracy'],
            every_n_iter=20)
    ]
    estimator.train(input_fn=train_input_fn, hooks=train_hooks)

    print('Evaluate model')
    eval_results = estimator.evaluate(input_fn=eval_input_fn)
    print('Eval loss: %s' % eval_results['loss'])
    print('Eval accuracy: %s' % eval_results['accuracy'])

    print('Generate some predictions:')
    preds = estimator.predict(input_fn=eval_input_fn)
    for _ in range(5):
        print(preds.__next__()['class'])
예제 #21
0
 def delete(self, name):
     name = normalize_name(name)
     dataset_path = self.datasets_dir + name
     delete_dir(dataset_path)
     return
예제 #22
0
root_dir = os.getcwd()
token = read_file_contents('token')
github = 'https://api.github.com/repos/KanoComputing/{}/tarball/{}'


for name, branch in repos_selected:
    url = github.format(name, branch)
    dir_str = '{}___{}'.format(name, branch)
    dir_path = os.path.join(root_dir, dir_str)
    debfile = ''
    print

    if args.down:
        print 'Downloading {} ...'.format(dir_str)

        delete_dir(dir_path)
        ensure_dir(dir_path)
        os.chdir(dir_path)

        if not token:
            cmd = 'curl -L -v -o tmp.tgz {url}'.format(url=url)
        else:
            cmd = 'curl -H "Authorization: token {token}" -L -v -o tmp.tgz {url}'.format(token=token, url=url)
        _, e, _ = run_cmd(cmd)

        if args.verbose:
            print e

        if '< Status: 302 Found' in e:
            print 'OK'
        else:
예제 #23
0
def kaldi_stt(file_path,
              transcript=None,
              std_bash=False,
              tmp_dir=TMP_DIR,
              model_dir=s5_path):
    """ Kaldi speech to text decoding """
    nj = 1
    beam = 14
    lat_beam = 6
    sr = 8000

    file_path_old = file_path[-4] + ".old.wav"

    os.rename(file_path, file_path_old)

    speaker_id = file_path.rsplit("/", 1)[1]
    speaker_dir = os.path.join(tmp_dir, speaker_id)
    wav_data_dir = os.path.join(speaker_dir, "data")
    wav_path = os.path.join(wav_data_dir, speaker_id)

    create_dir(wav_data_dir)

    transform_audio_file(file_path_old, wav_path, rate=sr)

    wavscp_path = os.path.join(speaker_dir, "wav.scp")
    with open(wavscp_path, "w") as scp_file:
        scp_file.write(f"{speaker_id} {wav_path}\n")

    utt2spk_path = os.path.join(speaker_dir, "utt2spk")
    with open(utt2spk_path, "w") as scp_file:
        scp_file.write(f"{speaker_id} {speaker_id}\n")

    spk2utt_path = os.path.join(speaker_dir, "spk2utt")
    with open(spk2utt_path, "w") as scp_file:
        scp_file.write(f"{speaker_id} {speaker_id}\n")

    if transcript:
        text_path = os.path.join(speaker_dir, "text")
        with open(text_path, "w") as scp_file:
            scp_file.write(f"{speaker_id} {transcript}\n")

    speaker_dir = f"{TMP_DIR}/{speaker_id}"
    decode_dir = f"{model_dir}/exp/tri5_ali/{speaker_id}"

    export = f"""
        export nj={nj}
        export beam={beam}
        export lat_beam={lat_beam}
        export KALDI_ROOT={KALDI_PATH}
        export s5_path={model_dir}
        export decode_dir="{TMP_DIR}/{speaker_id}"
        export model_dir="{model_dir}/exp/tri5_ali"
        export decode_res_dir="{model_dir}/exp/tri5_ali/{speaker_id}"

        [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
        export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
        [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1
        . $KALDI_ROOT/tools/config/common_path.sh
        export LC_ALL=C
    """
    make_features = """
        cd $s5_path
        steps/make_mfcc.sh --nj $nj $decode_dir $decode_dir/log/mfcc
        steps/compute_cmvn_stats.sh $decode_dir $decode_dir/log/mfcc
    """
    if transcript:
        decode = """
            if [ -d decode_res_dir ]; then
                rm -rf $decode_res_dir
            fi
            steps/decode.sh --skip-scoring false --beam $beam --lattice-beam $lat_beam --nj $nj $model_dir/graph/ $decode_dir $decode_res_dir
        """
    else:
        decode = """
            if [ -d decode_res_dir ]; then
                rm -rf $decode_res_dir
            fi
            steps/decode.sh --skip-scoring false --beam $beam --lattice-beam $lat_beam --nj $nj $model_dir/graph/ $decode_dir $decode_res_dir
        """
    extract_res = """
    find  ${decode_res_dir}  -name lat*.gz -exec bash -c \
    'lattice-best-path  --acoustic-scale=0.085 --word-symbol-table=exp/tri5_ali/graph/words.txt ark:"gunzip -c {} |" ark,t:${decode_res_dir}/one-best.tra_$(basename ${0/gz/txt})' {} \;
    cat ${decode_res_dir}/one-best*.txt >> ${decode_res_dir}/all.txt
    utils/int2sym.pl -f 2- exp/tri5_ali/graph/words.txt ${decode_res_dir}/all.txt > ${decode_res_dir}/best_hyp.txt
    """

    p = subprocess.Popen(export + make_features + decode + extract_res,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()

    if transcript:
        with open(
                os.path.join(model_dir, "exp", "tri5_ali", speaker_id,
                             'scoring_kaldi', 'best_wer')) as file_:
            wer = file_.readlines()
            wer = re.findall(r"(.*)\]\s+", wer[0])[0].strip() + ']'
        with open(
                os.path.join(model_dir, "exp", "tri5_ali", speaker_id,
                             'scoring_kaldi', 'wer_details',
                             'per_utt')) as file_:
            wer_details = file_.readlines()
            wer_details = "".join(wer_details)

    tb = "\n\n".join([stdout.decode(), stderr.decode()])
    if std_bash:
        print(tb)

    with open(f"{decode_dir}/best_hyp.txt", 'r', encoding='utf-8') as f:
        res = f.readlines()

    thrash_folders = (speaker_dir, decode_dir)
    [delete_dir(folder) for folder in thrash_folders]

    try:
        res = res[0].split(" ", 1)[1], "Success!"
    except IndexError:
        res = tb, "Failed!"
        logging.error(f"Kaldi traceback:\n{tb}")
    # return res
    if transcript:
        return res, wer, wer_details
    else:
        return res
예제 #24
0
 def reset_all(self):
     utils.delete_dir(self.domain_output_dir)
예제 #25
0
 def setUp(self):
     print
     self.client = utils.Client()
     utils.delete_dir("")
     utils.create_dir("")