Exemplo n.º 1
0
def evaluate_abx(args):
    out_dir = Path(args.out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    feature_path = out_dir / "features.features"
    distance_path = out_dir / "data.distance"
    score_path = out_dir / "data.score"
    analyze_path = out_dir / "data.csv"

    if not feature_path.exists():
        convert(args.feature_dir, h5_filename=str(feature_path))

    if not distance_path.exists():
        distances.compute_distances(str(feature_path),
                                    "features",
                                    str(args.task_path),
                                    str(distance_path),
                                    dtw_cosine_distance,
                                    normalized=True,
                                    n_cpu=6)

    if not score_path.exists():
        score.score(str(args.task_path), str(distance_path), str(score_path))

    if not analyze_path.exists():
        analyze.analyze(str(args.task_path), str(score_path),
                        str(analyze_path))

    abx = average_abx(str(analyze_path), args.task_type)
    print("average abx: {:.3f}".format(abx))
Exemplo n.º 2
0
def _abx(features_path, temp_dir, task, task_type, load_fun, distance,
         normalized, njobs, log):
    """Runs the ABX pipeline"""
    dist2fun = {
        'cosine': default_distance,
        'KL': dtw_kl_distance,
        'levenshtein': edit_distance
    }

    # convert
    log.debug('loading features ...')
    features = os.path.join(temp_dir, 'features.h5')
    if not os.path.isfile(features):
        convert(features_path, h5_filename=features, load=load_fun)

    # avoid annoying log message
    numexpr.set_num_threads(njobs)

    log.debug('computing %s distances ...', distance)
    # ABX Distances prints some messages we do not want to display
    sys.stdout = open(os.devnull, 'w')
    distance_file = os.path.join(temp_dir, 'distance_{}.h5'.format(task_type))
    with warnings.catch_warnings():
        # inhibit some useless warnings about complex to float conversion
        warnings.filterwarnings("ignore", category=np.ComplexWarning)

        # compute the distances
        ABXpy.distances.distances.compute_distances(features,
                                                    'features',
                                                    task,
                                                    distance_file,
                                                    dist2fun[distance],
                                                    normalized,
                                                    n_cpu=njobs)
    sys.stdout = sys.__stdout__

    log.debug('computing abx score ...')
    # score
    score_file = os.path.join(temp_dir, 'score_{}.h5'.format(task_type))
    score(task, distance_file, score_file)

    # analyze
    analyze_file = os.path.join(temp_dir, 'analyze_{}.csv'.format(task_type))
    analyze(task, score_file, analyze_file)

    # average
    abx_score = _average(analyze_file, task_type)
    return abx_score
Exemplo n.º 3
0
    parser.parse_args()
    args, leftovers = parser.parse_known_args()

    print(args.output_name)
    try:
        shutil.rmtree('{}/tmp'.format(args.target_dir))
    except:
        pass

    if os.path.exists('{}/{}'.format(args.target_dir, args.output_name)):
        os.remove('{}/{}'.format(args.target_dir, args.output_name))

    os.makedirs('{}/tmp'.format(args.target_dir))

    with kaldiio.ReadHelper('scp:{}'.format(args.feats_file)) as reader:
        filenames = []
        times = np.array([0])
        for key, numpy_array in reader:
            filenames.append(key)
            ivector_2d = np.expand_dims(numpy_array.astype(np.float64), axis=0)
            np.savez('{}/tmp/{}'.format(args.target_dir, key),
                     features=ivector_2d,
                     time=times)
    print('aaa')
    any2h5features.convert('{}/tmp/'.format(args.target_dir),
                           '{}/{}'.format(args.target_dir, args.output_name))
    print(args.target_dir, args.output_name)
    print('bbb')

    # shutil.rmtree('{}/tmp'.format(args.target_dir))
Exemplo n.º 4
0
def fullrun(task,
            data_folder,
            feature_folder,
            h5,
            file_sizes,
            corpus,
            distance,
            outputdir,
            normalized,
            doall=True,
            ncpus=None):
    print("Processing task {}".format(task['section']))

    feature_file = os.path.join(outputdir, lookup('featurefile', task))

    try:
        if distance:
            distancepair = distance.split('.')
            distancemodule = distancepair[0]
            distancefunction = distancepair[1]
            path, mod = os.path.split(distancemodule)
            sys.path.insert(0, path)
            distancefun = getattr(__import__(mod), distancefunction)
        else:
            distancemodule = lookup('distancemodule', task,
                                    os.path.join(CURDIR, 'distance'))
            distancefunction = lookup('distancefunction', task, 'distance')
            path, mod = os.path.split(distancemodule)
            sys.path.insert(0, path)
            distancefun = getattr(__import__(mod), distancefunction)
    except:
        sys.stderr.write('distance not found\n')
        raise

    distance_file = os.path.join(outputdir, lookup('distancefile', task))
    scorefilename = os.path.join(outputdir, lookup('scorefile', task))

    taskfilename = os.path.join(
        data_folder, 'test', corpus, '{}s'.format(file_sizes),
        '{}s_{}.abx'.format(file_sizes, lookup('type', task)))

    # # taskfilename = os.path.join(CURDIR, lookup('taskfile', task))
    # taskname = os.path.join(
    #     lookup('taskdir', task), '{}/{}s_{}_{}.abx'.format(
    #         corpus, file_sizes, distinction, lookup('type', task)))
    # taskfilename = os.path.abspath(os.path.join(CURDIR, taskname))
    print('Task file is {}'.format(taskfilename))
    assert os.path.isfile(taskfilename), 'Task file unknown'

    analyzefilename = os.path.join(outputdir, lookup('analyzefile', task))
    # on = lookup('on', task)
    # across = nonesplit(lookup('across', task))
    # by = nonesplit(lookup('by', task))
    # filters = lookup('filters', task)
    # regressors = lookup('regressors', task)
    # sampling = lookup('sampling', task)
    if not ncpus:
        ncpus = int(lookup('ncpus', task, 1))

    makedirs([feature_file, distance_file, scorefilename, analyzefilename])

    # tasktime = getmtime(taskfilename)
    # featuretime = getmtime(feature_file)
    # distancetime = getmtime(distance_file)
    # scoretime = getmtime(scorefilename)
    # analyzetime = getmtime(analyzefilename)
    # featfoldertime = max([getmtime(os.path.join(feature_folder, f))
    #                       for f in os.listdir(feature_folder)])

    # Preprocessing
    if not h5:
        try:
            print("Preprocessing... Writing the features in h5 format")
            tryremove(feature_file)
            any2h5features.convert(feature_folder,
                                   h5_filename=feature_file,
                                   load=loadfeats)
            # featuretime = getmtime(feature_file)
            with h5py.File(feature_file) as fh:
                fh.attrs.create('done', True)
        except:
            sys.stderr.write('Error when writing the features from {} to {}\n'
                             'Check the paths availability\n'.format(
                                 os.path.realpath(feature_folder),
                                 os.path.realpath(feature_file)))
            # tryremove(feature_file)
            raise
    else:
        feature_file = os.path.join(feature_folder,
                                    '{}s.h5f'.format(file_sizes))

    # computing
    try:
        print("Computing the distances")
        tryremove(distance_file)
        distances.compute_distances(feature_file,
                                    '/features/',
                                    taskfilename,
                                    distance_file,
                                    distancefun,
                                    normalized=normalized,
                                    n_cpu=ncpus)

        tryremove(scorefilename)
        print("Computing the scores")
        score.score(taskfilename, distance_file, scorefilename)

        tryremove(analyzefilename)
        print("Collapsing the results")
        analyze.analyze(taskfilename, scorefilename, analyzefilename)

        return avg(analyzefilename, task)
    # except Exception as e:
    #     sys.stderr.write('An error occured during the computation\n')
    #     raise e
    finally:
        tryremove(distance_file)
        tryremove(scorefilename)
        tryremove(analyzefilename)
        if not h5:
            tryremove(feature_file)